Added support for IMIX through config and command line
[samplevnf.git] / VNFs / DPPD-PROX / handle_swap.c
1 /*
2 // Copyright (c) 2010-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <rte_mbuf.h>
18 #include <rte_udp.h>
19
20 #include "task_init.h"
21 #include "task_base.h"
22 #include "lconf.h"
23 #include "log.h"
24 #include "prox_port_cfg.h"
25 #include "mpls.h"
26 #include "qinq.h"
27 #include "gre.h"
28 #include "prefetch.h"
29 #include "defines.h"
30 #include "igmp.h"
31 #include "prox_cksum.h"
32 #include "prox_compat.h"
33
34 struct task_swap {
35         struct task_base base;
36         struct rte_mempool *igmp_pool;
37         uint32_t runtime_flags;
38         uint32_t igmp_address;
39         uint8_t src_dst_mac[12];
40         uint32_t local_ipv4;
41         int offload_crc;
42         uint64_t last_echo_req_rcvd_tsc;
43         uint64_t last_echo_rep_rcvd_tsc;
44         uint32_t n_echo_req;
45         uint32_t n_echo_rep;
46 };
47
48 #define NB_IGMP_MBUF            1024
49 #define IGMP_MBUF_SIZE          2048
50 #define NB_CACHE_IGMP_MBUF      256
51
52 static void write_src_and_dst_mac(struct task_swap *task, struct rte_mbuf *mbuf)
53 {
54         prox_rte_ether_hdr *hdr;
55         prox_rte_ether_addr mac;
56
57         if (unlikely((task->runtime_flags & (TASK_ARG_DST_MAC_SET|TASK_ARG_SRC_MAC_SET)) == (TASK_ARG_DST_MAC_SET|TASK_ARG_SRC_MAC_SET))) {
58                 /* Source and Destination mac hardcoded */
59                 hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
60                 rte_memcpy(hdr, task->src_dst_mac, sizeof(task->src_dst_mac));
61         } else {
62                 hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
63                 if (likely((task->runtime_flags & TASK_ARG_SRC_MAC_SET) == 0)) {
64                         /* dst mac will be used as src mac */
65                         prox_rte_ether_addr_copy(&hdr->d_addr, &mac);
66                 }
67
68                 if (unlikely(task->runtime_flags & TASK_ARG_DST_MAC_SET))
69                         prox_rte_ether_addr_copy((prox_rte_ether_addr *)&task->src_dst_mac[0], &hdr->d_addr);
70                 else
71                         prox_rte_ether_addr_copy(&hdr->s_addr, &hdr->d_addr);
72
73                 if (unlikely(task->runtime_flags & TASK_ARG_SRC_MAC_SET)) {
74                         prox_rte_ether_addr_copy((prox_rte_ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
75                 } else {
76                         prox_rte_ether_addr_copy(&mac, &hdr->s_addr);
77                 }
78         }
79 }
80 static inline void build_mcast_mac(uint32_t ip, prox_rte_ether_addr *dst_mac)
81 {
82         // MAC address is 01:00:5e followed by 23 LSB of IP address
83         uint64_t mac = 0x0000005e0001L | ((ip & 0xFFFF7F00L) << 16);
84         memcpy(dst_mac, &mac, sizeof(prox_rte_ether_addr));
85 }
86
87 static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf)
88 {
89         struct task_swap *task = (struct task_swap *)tbase;
90         prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
91         prox_rte_ether_addr dst_mac;
92         prox_rte_ether_addr_copy(&hdr->s_addr, &dst_mac);
93         prox_rte_ether_addr_copy(&hdr->d_addr, &hdr->s_addr);
94         prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr);
95         prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
96         ip_hdr->dst_addr = ip_hdr->src_addr;
97         ip_hdr->src_addr = task->local_ipv4;
98         prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1);
99         picmp->icmp_type = PROX_RTE_IP_ICMP_ECHO_REPLY;
100 }
101
102 static inline void build_igmp_message(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t ip, uint8_t igmp_message)
103 {
104         struct task_swap *task = (struct task_swap *)tbase;
105         prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
106         prox_rte_ether_addr dst_mac;
107         build_mcast_mac(ip, &dst_mac);
108
109         rte_pktmbuf_pkt_len(mbuf) = 46;
110         rte_pktmbuf_data_len(mbuf) = 46;
111         init_mbuf_seg(mbuf);
112
113         prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr);
114         prox_rte_ether_addr_copy((prox_rte_ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
115         hdr->ether_type = ETYPE_IPv4;
116
117         prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
118         ip_hdr->version_ihl = 0x45;             /**< version and header length */
119         ip_hdr->type_of_service = 0;    /**< type of service */
120         ip_hdr->total_length = rte_cpu_to_be_16(32);            /**< length of packet */
121         ip_hdr->packet_id = 0;          /**< packet ID */
122         ip_hdr->fragment_offset = 0;    /**< fragmentation offset */
123         ip_hdr->time_to_live = 1;               /**< time to live */
124         ip_hdr->next_proto_id = IPPROTO_IGMP;           /**< protocol ID */
125         ip_hdr->hdr_checksum = 0;               /**< header checksum */
126         ip_hdr->src_addr = task->local_ipv4;            /**< source address */
127         ip_hdr->dst_addr = ip;  /**< destination address */
128         struct igmpv2_hdr *pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
129         pigmp->type = igmp_message;
130         pigmp->max_resp_time = 0;
131         pigmp->checksum = 0;
132         pigmp->group_address = ip;
133         prox_ip_udp_cksum(mbuf, ip_hdr, sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr), task->offload_crc);
134 }
135
136 static void stop_swap(struct task_base *tbase)
137 {
138         struct task_swap *task = (struct task_swap *)tbase;
139         if (task->igmp_pool) {
140                 rte_mempool_free(task->igmp_pool);
141                 task->igmp_pool = NULL;
142         }
143 }
144
145 static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
146 {
147         struct task_swap *task = (struct task_swap *)tbase;
148         prox_rte_ether_hdr *hdr;
149         prox_rte_ether_addr mac;
150         prox_rte_ipv4_hdr *ip_hdr;
151         prox_rte_udp_hdr *udp_hdr;
152         struct gre_hdr *pgre;
153         prox_rte_ipv4_hdr *inner_ip_hdr;
154         uint32_t ip;
155         uint16_t port;
156         uint8_t out[64] = {0};
157         struct mpls_hdr *mpls;
158         uint32_t mpls_len = 0;
159         struct qinq_hdr *qinq;
160         prox_rte_vlan_hdr *vlan;
161         uint16_t j;
162         struct igmpv2_hdr *pigmp;
163         prox_rte_icmp_hdr *picmp;
164         uint8_t type;
165
166         for (j = 0; j < n_pkts; ++j) {
167                 PREFETCH0(mbufs[j]);
168         }
169         for (j = 0; j < n_pkts; ++j) {
170                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *));
171         }
172
173         // TODO 1: check packet is long enough for Ethernet + IP + UDP = 42 bytes
174         for (uint16_t j = 0; j < n_pkts; ++j) {
175                 hdr = rte_pktmbuf_mtod(mbufs[j], prox_rte_ether_hdr *);
176                 switch (hdr->ether_type) {
177                 case ETYPE_MPLSU:
178                         mpls = (struct mpls_hdr *)(hdr + 1);
179                         while (!(mpls->bytes & 0x00010000)) {
180                                 // TODO: verify pcket length
181                                 mpls++;
182                                 mpls_len += sizeof(struct mpls_hdr);
183                         }
184                         mpls_len += sizeof(struct mpls_hdr);
185                         ip_hdr = (prox_rte_ipv4_hdr *)(mpls + 1);
186                         break;
187                 case ETYPE_8021ad:
188                         qinq = (struct qinq_hdr *)hdr;
189                         if (qinq->cvlan.eth_proto != ETYPE_VLAN) {
190                                 plog_warn("Unexpected proto in QinQ = %#04x\n", qinq->cvlan.eth_proto);
191                                 out[j] = OUT_DISCARD;
192                                 continue;
193                         }
194                         ip_hdr = (prox_rte_ipv4_hdr *)(qinq + 1);
195                         break;
196                 case ETYPE_VLAN:
197                         vlan = (prox_rte_vlan_hdr *)(hdr + 1);
198                         if (vlan->eth_proto == ETYPE_IPv4) {
199                                 ip_hdr = (prox_rte_ipv4_hdr *)(vlan + 1);
200                         } else if (vlan->eth_proto == ETYPE_VLAN) {
201                                 vlan = (prox_rte_vlan_hdr *)(vlan + 1);
202                                 if (vlan->eth_proto == ETYPE_IPv4) {
203                                         ip_hdr = (prox_rte_ipv4_hdr *)(vlan + 1);
204                                 }
205                                 else if (vlan->eth_proto == ETYPE_IPv6) {
206                                         plog_warn("Unsupported IPv6\n");
207                                         out[j] = OUT_DISCARD;
208                                         continue;
209                                 }
210                                 else {
211                                         plog_warn("Unsupported packet type\n");
212                                         out[j] = OUT_DISCARD;
213                                         continue;
214                                 }
215                         } else {
216                                 plog_warn("Unsupported packet type\n");
217                                 out[j] = OUT_DISCARD;
218                                 continue;
219                         }
220                         break;
221                 case ETYPE_IPv4:
222                         ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
223                         break;
224                 case ETYPE_IPv6:
225                         plog_warn("Unsupported IPv6\n");
226                         out[j] = OUT_DISCARD;
227                         continue;
228                 case ETYPE_LLDP:
229                         out[j] = OUT_DISCARD;
230                         continue;
231                 default:
232                         plog_warn("Unsupported ether_type 0x%x\n", hdr->ether_type);
233                         out[j] = OUT_DISCARD;
234                         continue;
235                 }
236                 // TODO 2 : check packet is long enough for Ethernet + IP + UDP + extra header (VLAN, MPLS, ...)
237                 ip = ip_hdr->dst_addr;
238
239                 switch (ip_hdr->next_proto_id) {
240                 case IPPROTO_GRE:
241                         ip_hdr->dst_addr = ip_hdr->src_addr;
242                         ip_hdr->src_addr = ip;
243
244                         pgre = (struct gre_hdr *)(ip_hdr + 1);
245                         inner_ip_hdr = ((prox_rte_ipv4_hdr *)(pgre + 1));
246                         ip = inner_ip_hdr->dst_addr;
247                         inner_ip_hdr->dst_addr = inner_ip_hdr->src_addr;
248                         inner_ip_hdr->src_addr = ip;
249
250                         udp_hdr = (prox_rte_udp_hdr *)(inner_ip_hdr + 1);
251                         // TODO 3.1 : verify proto is UPD or TCP
252                         port = udp_hdr->dst_port;
253                         udp_hdr->dst_port = udp_hdr->src_port;
254                         udp_hdr->src_port = port;
255                         write_src_and_dst_mac(task, mbufs[j]);
256                         break;
257                 case IPPROTO_UDP:
258                 case IPPROTO_TCP:
259                         if (task->igmp_address && PROX_RTE_IS_IPV4_MCAST(rte_be_to_cpu_32(ip))) {
260                                 out[j] = OUT_DISCARD;
261                                 continue;
262                         }
263                         udp_hdr = (prox_rte_udp_hdr *)(ip_hdr + 1);
264                         ip_hdr->dst_addr = ip_hdr->src_addr;
265                         ip_hdr->src_addr = ip;
266
267                         port = udp_hdr->dst_port;
268                         udp_hdr->dst_port = udp_hdr->src_port;
269                         udp_hdr->src_port = port;
270                         write_src_and_dst_mac(task, mbufs[j]);
271                         break;
272                 case IPPROTO_ICMP:
273                         picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1);
274                         type = picmp->icmp_type;
275                         if (type == PROX_RTE_IP_ICMP_ECHO_REQUEST) {
276                                 if (ip_hdr->dst_addr == task->local_ipv4) {
277                                         task->n_echo_req++;
278                                         if (rte_rdtsc() - task->last_echo_req_rcvd_tsc > rte_get_tsc_hz()) {
279                                                 plog_info("Received %u Echo Request on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", task->n_echo_req, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr)));
280                                                 task->n_echo_req = 0;
281                                                 task->last_echo_req_rcvd_tsc = rte_rdtsc();
282                                         }
283                                         build_icmp_reply_message(tbase, mbufs[j]);
284                                 } else {
285                                         out[j] = OUT_DISCARD;
286                                         continue;
287                                 }
288                         } else if (type == PROX_RTE_IP_ICMP_ECHO_REPLY) {
289                                 if (ip_hdr->dst_addr == task->local_ipv4) {
290                                         task->n_echo_rep++;
291                                         if (rte_rdtsc() - task->last_echo_rep_rcvd_tsc > rte_get_tsc_hz()) {
292                                                 plog_info("Received %u Echo Reply on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", task->n_echo_rep, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr)));
293                                                 task->n_echo_rep = 0;
294                                                 task->last_echo_rep_rcvd_tsc = rte_rdtsc();
295                                         }
296                                 } else {
297                                         out[j] = OUT_DISCARD;
298                                         continue;
299                                 }
300                         } else {
301                                 out[j] = OUT_DISCARD;
302                                 continue;
303                         }
304                         break;
305                 case IPPROTO_IGMP:
306                         pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
307                         // TODO: check packet len
308                         type = pigmp->type;
309                         if (type == IGMP_MEMBERSHIP_QUERY) {
310                                 if (task->igmp_address) {
311                                         // We have an address registered
312                                         if ((task->igmp_address == pigmp->group_address) || (pigmp->group_address == 0)) {
313                                                 // We get a request for the registered address, or to 0.0.0.0
314                                                 build_igmp_message(tbase, mbufs[j], task->igmp_address, IGMP_MEMBERSHIP_REPORT);        // replace Membership query packet with a response
315                                         } else {
316                                                 // Discard as either we are not registered or this is a query for a different group
317                                                 out[j] = OUT_DISCARD;
318                                                 continue;
319                                         }
320                                 } else {
321                                         // Discard as either we are not registered
322                                         out[j] = OUT_DISCARD;
323                                         continue;
324                                 }
325                         } else {
326                                 // Do not forward other IGMP packets back
327                                 out[j] = OUT_DISCARD;
328                                 continue;
329                         }
330                         break;
331                 default:
332                         plog_warn("Unsupported IP protocol 0x%x\n", ip_hdr->next_proto_id);
333                         out[j] = OUT_DISCARD;
334                         continue;
335                 }
336         }
337         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
338 }
339
340 void igmp_join_group(struct task_base *tbase, uint32_t igmp_address)
341 {
342         struct task_swap *task = (struct task_swap *)tbase;
343         struct rte_mbuf *igmp_mbuf;
344         uint8_t out[64] = {0};
345         int ret;
346
347         task->igmp_address = igmp_address;
348         ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
349         if (ret != 0) {
350                 plog_err("Unable to allocate igmp mbuf\n");
351                 return;
352         }
353         build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_MEMBERSHIP_REPORT);
354         task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
355 }
356
357 void igmp_leave_group(struct task_base *tbase)
358 {
359         struct task_swap *task = (struct task_swap *)tbase;
360         struct rte_mbuf *igmp_mbuf;
361         uint8_t out[64] = {0};
362         int ret;
363
364         task->igmp_address = 0;
365         ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
366         if (ret != 0) {
367                 plog_err("Unable to allocate igmp mbuf\n");
368                 return;
369         }
370         build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_LEAVE_GROUP);
371         task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
372 }
373
374 static void init_task_swap(struct task_base *tbase, struct task_args *targ)
375 {
376         struct task_swap *task = (struct task_swap *)tbase;
377         prox_rte_ether_addr *src_addr, *dst_addr;
378
379         /*
380          * The destination MAC of the outgoing packet is based on the config file:
381          *    - 'dst mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as dst mac
382          *    - 'dst mac=packet'            => the src mac of the incoming packet is used as dst mac
383          *    - (default - no 'dst mac')    => the src mac from the incoming packet is used as dst mac
384          *
385          * The source MAC of the outgoing packet is based on the config file:
386          *    - 'src mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as src mac
387          *    - 'src mac=packet'            => the dst mac of the incoming packet is used as src mac
388          *    - 'src mac=hw'                => the mac address of the tx port is used as src mac
389          *                                     An error is returned if there are no physical tx ports
390          *    - (default - no 'src mac')    => if there is physical tx port, the mac of that port is used as src mac
391          *    - (default - no 'src mac')       if there are no physical tx ports the dst mac of the incoming packet
392          */
393
394         if (targ->flags & TASK_ARG_DST_MAC_SET) {
395                 dst_addr = &targ->edaddr;
396                 memcpy(&task->src_dst_mac[0], dst_addr, sizeof(*src_addr));
397         }
398
399         PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_SRC_MAC, "src mac must be set in swap mode, by definition => src mac=no is not supported\n");
400         PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_DST_MAC, "dst mac must be set in swap mode, by definition => dst mac=no is not supported\n");
401
402         if (targ->flags & TASK_ARG_SRC_MAC_SET) {
403                 src_addr =  &targ->esaddr;
404                 memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
405                 plog_info("\t\tCore %d: src mac set from config file\n", targ->lconf->id);
406         } else {
407                 if (targ->flags & TASK_ARG_HW_SRC_MAC)
408                         PROX_PANIC(targ->nb_txports == 0, "src mac set to hw but no tx port\n");
409                 if (targ->nb_txports) {
410                         src_addr = &prox_port_cfg[task->base.tx_params_hw.tx_port_queue[0].port].eth_addr;
411                         memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
412                         targ->flags |= TASK_ARG_SRC_MAC_SET;
413                         plog_info("\t\tCore %d: src mac set from port\n", targ->lconf->id);
414                 }
415         }
416         task->runtime_flags = targ->flags;
417         task->igmp_address =  rte_cpu_to_be_32(targ->igmp_address);
418         if (task->igmp_pool == NULL) {
419                 static char name[] = "igmp0_pool";
420                 name[4]++;
421                 struct rte_mempool *ret = rte_mempool_create(name, NB_IGMP_MBUF, IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF,
422                         sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
423                         rte_socket_id(), 0);
424                 PROX_PANIC(ret == NULL, "Failed to allocate IGMP memory pool on socket %u with %u elements\n",
425                         rte_socket_id(), NB_IGMP_MBUF);
426                 plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_IGMP_MBUF,
427                         IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF, rte_socket_id());
428                 task->igmp_pool = ret;
429         }
430         task->local_ipv4 = rte_cpu_to_be_32(targ->local_ipv4);
431
432         struct prox_port_cfg *port = find_reachable_port(targ);
433         if (port) {
434                 task->offload_crc = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
435         }
436 }
437
438 static struct task_init task_init_swap = {
439         .mode_str = "swap",
440         .init = init_task_swap,
441         .handle = handle_swap_bulk,
442         .flag_features = 0,
443         .size = sizeof(struct task_swap),
444         .stop_last = stop_swap
445 };
446
447 __attribute__((constructor)) static void reg_task_swap(void)
448 {
449         reg_task(&task_init_swap);
450 }