2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
21 #include "task_init.h"
22 #include "task_base.h"
25 #include "prox_port_cfg.h"
32 #include "prox_cksum.h"
33 #include "prox_compat.h"
35 #define MAX_STORE_PKT_SIZE 2048
39 unsigned char buf[MAX_STORE_PKT_SIZE];
43 struct task_base base;
44 struct rte_mempool *igmp_pool;
45 uint32_t runtime_flags;
46 uint32_t igmp_address;
47 uint8_t src_dst_mac[12];
50 uint64_t last_echo_req_rcvd_tsc;
51 uint64_t last_echo_rep_rcvd_tsc;
54 uint32_t store_pkt_id;
56 struct packet *store_buf;
60 #define NB_IGMP_MBUF 1024
61 #define IGMP_MBUF_SIZE 2048
62 #define NB_CACHE_IGMP_MBUF 256
64 static void write_src_and_dst_mac(struct task_swap *task, struct rte_mbuf *mbuf)
66 prox_rte_ether_hdr *hdr;
67 prox_rte_ether_addr mac;
69 if (unlikely((task->runtime_flags & (TASK_ARG_DST_MAC_SET|TASK_ARG_SRC_MAC_SET)) == (TASK_ARG_DST_MAC_SET|TASK_ARG_SRC_MAC_SET))) {
70 /* Source and Destination mac hardcoded */
71 hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
72 rte_memcpy(hdr, task->src_dst_mac, sizeof(task->src_dst_mac));
74 hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
75 if (unlikely((task->runtime_flags & TASK_ARG_SRC_MAC_SET) == 0)) {
76 /* dst mac will be used as src mac */
77 prox_rte_ether_addr_copy(&hdr->d_addr, &mac);
80 if (unlikely(task->runtime_flags & TASK_ARG_DST_MAC_SET))
81 prox_rte_ether_addr_copy((prox_rte_ether_addr *)&task->src_dst_mac[0], &hdr->d_addr);
83 prox_rte_ether_addr_copy(&hdr->s_addr, &hdr->d_addr);
85 if (likely(task->runtime_flags & TASK_ARG_SRC_MAC_SET)) {
86 prox_rte_ether_addr_copy((prox_rte_ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
88 prox_rte_ether_addr_copy(&mac, &hdr->s_addr);
92 static inline void build_mcast_mac(uint32_t ip, prox_rte_ether_addr *dst_mac)
94 // MAC address is 01:00:5e followed by 23 LSB of IP address
95 uint64_t mac = 0x0000005e0001L | ((ip & 0xFFFF7F00L) << 16);
96 memcpy(dst_mac, &mac, sizeof(prox_rte_ether_addr));
99 static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf)
101 struct task_swap *task = (struct task_swap *)tbase;
102 prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
103 prox_rte_ether_addr dst_mac;
104 prox_rte_ether_addr_copy(&hdr->s_addr, &dst_mac);
105 prox_rte_ether_addr_copy(&hdr->d_addr, &hdr->s_addr);
106 prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr);
107 prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
108 ip_hdr->dst_addr = ip_hdr->src_addr;
109 ip_hdr->src_addr = task->local_ipv4;
110 prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1);
111 picmp->icmp_type = PROX_RTE_IP_ICMP_ECHO_REPLY;
114 static inline void build_igmp_message(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t ip, uint8_t igmp_message)
116 struct task_swap *task = (struct task_swap *)tbase;
117 prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
118 prox_rte_ether_addr dst_mac;
119 build_mcast_mac(ip, &dst_mac);
121 rte_pktmbuf_pkt_len(mbuf) = 46;
122 rte_pktmbuf_data_len(mbuf) = 46;
125 prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr);
126 prox_rte_ether_addr_copy((prox_rte_ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
127 hdr->ether_type = ETYPE_IPv4;
129 prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
130 ip_hdr->version_ihl = 0x45; /**< version and header length */
131 ip_hdr->type_of_service = 0; /**< type of service */
132 ip_hdr->total_length = rte_cpu_to_be_16(32); /**< length of packet */
133 ip_hdr->packet_id = 0; /**< packet ID */
134 ip_hdr->fragment_offset = 0; /**< fragmentation offset */
135 ip_hdr->time_to_live = 1; /**< time to live */
136 ip_hdr->next_proto_id = IPPROTO_IGMP; /**< protocol ID */
137 ip_hdr->hdr_checksum = 0; /**< header checksum */
138 ip_hdr->src_addr = task->local_ipv4; /**< source address */
139 ip_hdr->dst_addr = ip; /**< destination address */
140 struct igmpv2_hdr *pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
141 pigmp->type = igmp_message;
142 pigmp->max_resp_time = 0;
144 pigmp->group_address = ip;
145 prox_ip_udp_cksum(mbuf, ip_hdr, sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr), task->offload_crc);
148 static void stop_swap(struct task_base *tbase)
151 struct task_swap *task = (struct task_swap *)tbase;
153 if (task->igmp_pool) {
154 rte_mempool_free(task->igmp_pool);
155 task->igmp_pool = NULL;
158 if (task->store_msk) {
159 for (i = task->store_pkt_id & task->store_msk; i < task->store_msk + 1; i++) {
160 if (task->store_buf[i].len) {
161 fprintf(task->fp, "%06d: ", i);
162 for (j = 0; j < task->store_buf[i].len; j++) {
163 fprintf(task->fp, "%02x ", task->store_buf[i].buf[j]);
165 fprintf(task->fp, "\n");
168 for (i = 0; i < (task->store_pkt_id & task->store_msk); i++) {
169 if (task->store_buf[i].len) {
170 fprintf(task->fp, "%06d: ", i);
171 for (j = 0; j < task->store_buf[i].len; j++) {
172 fprintf(task->fp, "%02x ", task->store_buf[i].buf[j]);
174 fprintf(task->fp, "\n");
180 static void handle_ipv6(struct task_swap *task, struct rte_mbuf *mbufs, prox_rte_ipv6_hdr *ipv6_hdr, uint8_t *out)
182 __m128i ip = _mm_loadu_si128((__m128i*)&(ipv6_hdr->src_addr));
184 uint16_t payload_len;
185 prox_rte_udp_hdr *udp_hdr;
187 rte_mov16((uint8_t *)&(ipv6_hdr->src_addr), (uint8_t *)&(ipv6_hdr->dst_addr)); // Copy dst into src
188 rte_mov16((uint8_t *)&(ipv6_hdr->dst_addr), (uint8_t *)&ip); // Copy src into dst
189 switch(ipv6_hdr->proto) {
192 payload_len = ipv6_hdr->payload_len;
193 udp_hdr = (prox_rte_udp_hdr *)(ipv6_hdr + 1);
194 if (unlikely(udp_hdr->dgram_len < payload_len)) {
195 plog_warn("Unexpected L4 len (%u) versus L3 payload len (%u) in IPv6 packet\n", udp_hdr->dgram_len, payload_len);
199 port = udp_hdr->dst_port;
200 udp_hdr->dst_port = udp_hdr->src_port;
201 udp_hdr->src_port = port;
202 write_src_and_dst_mac(task, mbufs);
206 plog_warn("Unsupported next hop %u in IPv6 packet\n", ipv6_hdr->proto);
212 static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
214 struct task_swap *task = (struct task_swap *)tbase;
215 prox_rte_ether_hdr *hdr;
216 prox_rte_ether_addr mac;
217 prox_rte_ipv4_hdr *ip_hdr;
218 prox_rte_udp_hdr *udp_hdr;
219 prox_rte_ipv6_hdr *ipv6_hdr;
220 struct gre_hdr *pgre;
221 prox_rte_ipv4_hdr *inner_ip_hdr;
224 uint8_t out[64] = {0};
225 struct mpls_hdr *mpls;
226 uint32_t mpls_len = 0;
227 struct qinq_hdr *qinq;
228 prox_rte_vlan_hdr *vlan;
230 struct igmpv2_hdr *pigmp;
231 prox_rte_icmp_hdr *picmp;
233 static int llc_printed = 0;
234 static int lldp_printed = 0;
236 for (j = 0; j < n_pkts; ++j) {
239 for (j = 0; j < n_pkts; ++j) {
240 PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *));
243 // TODO 1: check packet is long enough for Ethernet + IP + UDP = 42 bytes
244 for (uint16_t j = 0; j < n_pkts; ++j) {
245 hdr = rte_pktmbuf_mtod(mbufs[j], prox_rte_ether_hdr *);
246 switch (hdr->ether_type) {
248 mpls = (struct mpls_hdr *)(hdr + 1);
249 while (!(mpls->bytes & 0x00010000)) {
250 // TODO: verify pcket length
252 mpls_len += sizeof(struct mpls_hdr);
254 mpls_len += sizeof(struct mpls_hdr);
255 ip_hdr = (prox_rte_ipv4_hdr *)(mpls + 1);
256 if (unlikely((ip_hdr->version_ihl >> 4) == 6)) {
257 ipv6_hdr = (prox_rte_ipv6_hdr *)(ip_hdr);
258 handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]);
263 qinq = (struct qinq_hdr *)hdr;
264 if (qinq->cvlan.eth_proto != ETYPE_VLAN) {
265 plog_warn("Unexpected proto in QinQ = %#04x\n", qinq->cvlan.eth_proto);
266 out[j] = OUT_DISCARD;
269 if (qinq->ether_type == ETYPE_IPv4) {
270 ip_hdr = (prox_rte_ipv4_hdr *)(qinq + 1);
271 } else if (qinq->ether_type == ETYPE_IPv6) {
272 ipv6_hdr = (prox_rte_ipv6_hdr *)(qinq + 1);
273 handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]);
276 plog_warn("Unsupported packet type\n");
277 out[j] = OUT_DISCARD;
282 vlan = (prox_rte_vlan_hdr *)(hdr + 1);
283 if (vlan->eth_proto == ETYPE_IPv4) {
284 ip_hdr = (prox_rte_ipv4_hdr *)(vlan + 1);
285 } else if (vlan->eth_proto == ETYPE_IPv6) {
286 ipv6_hdr = (prox_rte_ipv6_hdr *)(vlan + 1);
287 handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]);
289 } else if (vlan->eth_proto == ETYPE_VLAN) {
290 vlan = (prox_rte_vlan_hdr *)(vlan + 1);
291 if (vlan->eth_proto == ETYPE_IPv4) {
292 ip_hdr = (prox_rte_ipv4_hdr *)(vlan + 1);
294 else if (vlan->eth_proto == ETYPE_IPv6) {
295 ipv6_hdr = (prox_rte_ipv6_hdr *)(vlan + 1);
296 handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]);
300 plog_warn("Unsupported packet type\n");
301 out[j] = OUT_DISCARD;
305 plog_warn("Unsupported packet type\n");
306 out[j] = OUT_DISCARD;
311 ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
314 ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr + 1);
315 handle_ipv6(task, mbufs[j], ipv6_hdr, &out[j]);
319 plog_info("Discarding LLDP packets (only printed once)\n");
322 out[j] = OUT_DISCARD;
325 if ((rte_bswap16(hdr->ether_type) < 0x600) && (rte_bswap16(hdr->ether_type) >= 16)) {
332 struct prox_llc *llc = (struct prox_llc *)(hdr + 1);
333 if ((llc->dsap == 0x42) && (llc->lsap == 0x42)) {
335 out[j] = OUT_DISCARD;
337 plog_info("Discarding STP packets (only printed once)\n");
343 plog_warn("Unsupported ether_type 0x%x\n", hdr->ether_type);
344 out[j] = OUT_DISCARD;
347 // TODO 2 : check packet is long enough for Ethernet + IP + UDP + extra header (VLAN, MPLS, ...)
350 ip = ip_hdr->dst_addr;
351 if (unlikely((ip_hdr->version_ihl >> 4) != 4)) {
352 out[j] = OUT_DISCARD;
356 switch (ip_hdr->next_proto_id) {
358 ip_hdr->dst_addr = ip_hdr->src_addr;
359 ip_hdr->src_addr = ip;
361 pgre = (struct gre_hdr *)(ip_hdr + 1);
362 inner_ip_hdr = ((prox_rte_ipv4_hdr *)(pgre + 1));
363 ip = inner_ip_hdr->dst_addr;
364 inner_ip_hdr->dst_addr = inner_ip_hdr->src_addr;
365 inner_ip_hdr->src_addr = ip;
367 udp_hdr = (prox_rte_udp_hdr *)(inner_ip_hdr + 1);
368 // TODO 3.1 : verify proto is UPD or TCP
369 port = udp_hdr->dst_port;
370 udp_hdr->dst_port = udp_hdr->src_port;
371 udp_hdr->src_port = port;
372 write_src_and_dst_mac(task, mbufs[j]);
376 if (unlikely(task->igmp_address && PROX_RTE_IS_IPV4_MCAST(rte_be_to_cpu_32(ip)))) {
377 out[j] = OUT_DISCARD;
380 udp_hdr = (prox_rte_udp_hdr *)(ip_hdr + 1);
381 ip_hdr->dst_addr = ip_hdr->src_addr;
382 ip_hdr->src_addr = ip;
384 port = udp_hdr->dst_port;
385 udp_hdr->dst_port = udp_hdr->src_port;
386 udp_hdr->src_port = port;
387 write_src_and_dst_mac(task, mbufs[j]);
390 picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1);
391 type = picmp->icmp_type;
392 if (type == PROX_RTE_IP_ICMP_ECHO_REQUEST) {
393 if (ip_hdr->dst_addr == task->local_ipv4) {
395 if (rte_rdtsc() - task->last_echo_req_rcvd_tsc > rte_get_tsc_hz()) {
396 plog_info("Received %u Echo Request on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", task->n_echo_req, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr)));
397 task->n_echo_req = 0;
398 task->last_echo_req_rcvd_tsc = rte_rdtsc();
400 build_icmp_reply_message(tbase, mbufs[j]);
402 out[j] = OUT_DISCARD;
405 } else if (type == PROX_RTE_IP_ICMP_ECHO_REPLY) {
406 if (ip_hdr->dst_addr == task->local_ipv4) {
408 if (rte_rdtsc() - task->last_echo_rep_rcvd_tsc > rte_get_tsc_hz()) {
409 plog_info("Received %u Echo Reply on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", task->n_echo_rep, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr)));
410 task->n_echo_rep = 0;
411 task->last_echo_rep_rcvd_tsc = rte_rdtsc();
414 out[j] = OUT_DISCARD;
418 out[j] = OUT_DISCARD;
423 pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
424 // TODO: check packet len
426 if (type == IGMP_MEMBERSHIP_QUERY) {
427 if (task->igmp_address) {
428 // We have an address registered
429 if ((task->igmp_address == pigmp->group_address) || (pigmp->group_address == 0)) {
430 // We get a request for the registered address, or to 0.0.0.0
431 build_igmp_message(tbase, mbufs[j], task->igmp_address, IGMP_MEMBERSHIP_REPORT); // replace Membership query packet with a response
433 // Discard as either we are not registered or this is a query for a different group
434 out[j] = OUT_DISCARD;
438 // Discard as either we are not registered
439 out[j] = OUT_DISCARD;
443 // Do not forward other IGMP packets back
444 out[j] = OUT_DISCARD;
449 plog_warn("Unsupported IP protocol 0x%x\n", ip_hdr->next_proto_id);
450 out[j] = OUT_DISCARD;
454 if (task->store_msk) {
455 for (int i = 0; i < n_pkts; i++) {
456 if (out[i] != OUT_DISCARD) {
457 hdr = rte_pktmbuf_mtod(mbufs[i], prox_rte_ether_hdr *);
458 memcpy(&task->store_buf[task->store_pkt_id & task->store_msk].buf, hdr, rte_pktmbuf_pkt_len(mbufs[i]));
459 task->store_buf[task->store_pkt_id & task->store_msk].len = rte_pktmbuf_pkt_len(mbufs[i]);
460 task->store_pkt_id++;
464 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
467 void igmp_join_group(struct task_base *tbase, uint32_t igmp_address)
469 struct task_swap *task = (struct task_swap *)tbase;
470 struct rte_mbuf *igmp_mbuf;
471 uint8_t out[64] = {0};
474 task->igmp_address = igmp_address;
475 ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
477 plog_err("Unable to allocate igmp mbuf\n");
480 build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_MEMBERSHIP_REPORT);
481 task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
484 void igmp_leave_group(struct task_base *tbase)
486 struct task_swap *task = (struct task_swap *)tbase;
487 struct rte_mbuf *igmp_mbuf;
488 uint8_t out[64] = {0};
491 task->igmp_address = 0;
492 ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
494 plog_err("Unable to allocate igmp mbuf\n");
497 build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_LEAVE_GROUP);
498 task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
501 static void init_task_swap(struct task_base *tbase, struct task_args *targ)
503 struct task_swap *task = (struct task_swap *)tbase;
504 prox_rte_ether_addr *src_addr, *dst_addr;
507 * The destination MAC of the outgoing packet is based on the config file:
508 * - 'dst mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as dst mac
509 * - 'dst mac=packet' => the src mac of the incoming packet is used as dst mac
510 * - (default - no 'dst mac') => the src mac from the incoming packet is used as dst mac
512 * The source MAC of the outgoing packet is based on the config file:
513 * - 'src mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as src mac
514 * - 'src mac=packet' => the dst mac of the incoming packet is used as src mac
515 * - 'src mac=hw' => the mac address of the tx port is used as src mac
516 * An error is returned if there are no physical tx ports
517 * - (default - no 'src mac') => if there is physical tx port, the mac of that port is used as src mac
518 * - (default - no 'src mac') if there are no physical tx ports the dst mac of the incoming packet
521 if (targ->flags & TASK_ARG_DST_MAC_SET) {
522 dst_addr = &targ->edaddr;
523 memcpy(&task->src_dst_mac[0], dst_addr, sizeof(*src_addr));
526 PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_SRC_MAC, "src mac must be set in swap mode, by definition => src mac=no is not supported\n");
527 PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_DST_MAC, "dst mac must be set in swap mode, by definition => dst mac=no is not supported\n");
529 if (targ->flags & TASK_ARG_SRC_MAC_SET) {
530 src_addr = &targ->esaddr;
531 memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
532 plog_info("\t\tCore %d: src mac set from config file\n", targ->lconf->id);
534 if (targ->flags & TASK_ARG_HW_SRC_MAC)
535 PROX_PANIC(targ->nb_txports == 0, "src mac set to hw but no tx port\n");
536 if (targ->nb_txports) {
537 src_addr = &prox_port_cfg[task->base.tx_params_hw.tx_port_queue[0].port].eth_addr;
538 memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
539 targ->flags |= TASK_ARG_SRC_MAC_SET;
540 plog_info("\t\tCore %d: src mac set from port\n", targ->lconf->id);
543 task->runtime_flags = targ->flags;
544 task->igmp_address = rte_cpu_to_be_32(targ->igmp_address);
545 if (task->igmp_pool == NULL) {
546 static char name[] = "igmp0_pool";
548 struct rte_mempool *ret = rte_mempool_create(name, NB_IGMP_MBUF, IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF,
549 sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
551 PROX_PANIC(ret == NULL, "Failed to allocate IGMP memory pool on socket %u with %u elements\n",
552 rte_socket_id(), NB_IGMP_MBUF);
553 plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_IGMP_MBUF,
554 IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF, rte_socket_id());
555 task->igmp_pool = ret;
557 task->local_ipv4 = rte_cpu_to_be_32(targ->local_ipv4);
559 struct prox_port_cfg *port = find_reachable_port(targ);
561 task->offload_crc = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
563 task->store_pkt_id = 0;
564 if (targ->store_max) {
566 sprintf(filename, "swap_buf_%02d_%02d", targ->lconf->id, targ->task);
568 task->store_msk = targ->store_max - 1;
569 task->store_buf = (struct packet *)malloc(sizeof(struct packet) * targ->store_max);
570 task->fp = fopen(filename, "w+");
571 PROX_PANIC(task->fp == NULL, "Unable to open %s\n", filename);
577 static struct task_init task_init_swap = {
579 .init = init_task_swap,
580 .handle = handle_swap_bulk,
582 .size = sizeof(struct task_swap),
583 .stop_last = stop_swap
586 __attribute__((constructor)) static void reg_task_swap(void)
588 reg_task(&task_init_swap);