Fix port throughput related statistics for i40e virtual functions
[samplevnf.git] / VNFs / DPPD-PROX / handle_swap.c
index 8e5a94c..3913101 100644 (file)
 #include "task_base.h"
 #include "lconf.h"
 #include "log.h"
-#include "arp.h"
-#include "handle_swap.h"
 #include "prox_port_cfg.h"
 #include "mpls.h"
 #include "qinq.h"
 #include "gre.h"
 #include "prefetch.h"
+#include "igmp.h"
+#include "prox_cksum.h"
 
 struct task_swap {
        struct task_base base;
-       uint8_t src_dst_mac[12];
+       struct rte_mempool *igmp_pool;
        uint32_t runtime_flags;
-       uint32_t tmp_ip;
-       uint32_t ip;
+       uint32_t igmp_address;
+       uint8_t src_dst_mac[12];
+       uint32_t local_ipv4;
+       int offload_crc;
 };
 
-static void task_update_config(struct task_swap *task)
-{
-       if (unlikely(task->ip != task->tmp_ip))
-               task->ip = task->tmp_ip;
-}
+#define NB_IGMP_MBUF           1024
+#define IGMP_MBUF_SIZE                 2048
+#define NB_CACHE_IGMP_MBUF     256
 
 static void write_src_and_dst_mac(struct task_swap *task, struct rte_mbuf *mbuf)
 {
@@ -71,33 +71,46 @@ static void write_src_and_dst_mac(struct task_swap *task, struct rte_mbuf *mbuf)
                }
        }
 }
-static inline int handle_arp_request(struct task_swap *task, struct ether_hdr_arp *hdr_arp, struct ether_addr *s_addr, uint32_t ip)
+
+static inline void build_mcast_mac(uint32_t ip, struct ether_addr *dst_mac)
 {
-       if ((hdr_arp->arp.data.tpa == ip) || (ip == 0)) {
-               prepare_arp_reply(hdr_arp, s_addr);
-               memcpy(hdr_arp->ether_hdr.d_addr.addr_bytes, hdr_arp->ether_hdr.s_addr.addr_bytes, 6);
-               memcpy(hdr_arp->ether_hdr.s_addr.addr_bytes, s_addr, 6);
-               return 0;
-       } else if (task->runtime_flags & TASK_MULTIPLE_MAC) {
-               struct ether_addr tmp_s_addr;
-               create_mac(hdr_arp, &tmp_s_addr);
-               prepare_arp_reply(hdr_arp, &tmp_s_addr);
-               memcpy(hdr_arp->ether_hdr.d_addr.addr_bytes, hdr_arp->ether_hdr.s_addr.addr_bytes, 6);
-               memcpy(hdr_arp->ether_hdr.s_addr.addr_bytes, &tmp_s_addr, 6);
-               return 0;
-       } else {
-               plogx_dbg("Received ARP on unexpected IP %x, expecting %x\n", rte_be_to_cpu_32(hdr_arp->arp.data.tpa), rte_be_to_cpu_32(ip));
-               return OUT_DISCARD;
-       }
+       // MAC address is 01:00:5e followed by 23 LSB of IP address
+       uint64_t mac = 0x0000005e0001L | ((ip & 0xFFFF7F00L) << 16);
+       memcpy(dst_mac, &mac, sizeof(struct ether_addr));
 }
 
-/*
- * swap mode does not send arp requests, so does not expect arp replies
- * Need to understand later whether we must send arp requests
- */
-static inline int handle_arp_replies(struct task_swap *task, struct ether_hdr_arp *hdr_arp)
+static inline void build_igmp_message(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t ip, uint8_t igmp_message)
 {
-       return OUT_DISCARD;
+       struct task_swap *task = (struct task_swap *)tbase;
+       struct ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+       struct ether_addr dst_mac;
+       build_mcast_mac(ip, &dst_mac);
+
+        rte_pktmbuf_pkt_len(mbuf) = 46;
+        rte_pktmbuf_data_len(mbuf) = 46;
+        init_mbuf_seg(mbuf);
+
+        ether_addr_copy(&dst_mac, &hdr->d_addr);
+       ether_addr_copy((struct ether_addr *)&task->src_dst_mac[6], &hdr->s_addr);
+       hdr->ether_type = ETYPE_IPv4;
+
+       struct ipv4_hdr *ip_hdr = (struct ipv4_hdr *)(hdr + 1);
+       ip_hdr->version_ihl = 0x45;             /**< version and header length */
+       ip_hdr->type_of_service = 0;    /**< type of service */
+       ip_hdr->total_length = rte_cpu_to_be_16(32);            /**< length of packet */
+       ip_hdr->packet_id = 0;          /**< packet ID */
+       ip_hdr->fragment_offset = 0;    /**< fragmentation offset */
+       ip_hdr->time_to_live = 1;               /**< time to live */
+       ip_hdr->next_proto_id = IPPROTO_IGMP;           /**< protocol ID */
+       ip_hdr->hdr_checksum = 0;               /**< header checksum */
+       ip_hdr->src_addr = task->local_ipv4;            /**< source address */
+       ip_hdr->dst_addr = ip;  /**< destination address */
+       struct igmpv2_hdr *pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
+       pigmp->type = igmp_message;
+       pigmp->max_resp_time = 0;
+       pigmp->checksum = 0;
+       pigmp->group_address = ip;
+       prox_ip_udp_cksum(mbuf, ip_hdr, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
 }
 
 static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
@@ -107,6 +120,8 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui
        struct ether_addr mac;
        struct ipv4_hdr *ip_hdr;
        struct udp_hdr *udp_hdr;
+       struct gre_hdr *pgre;
+       struct ipv4_hdr *inner_ip_hdr;
        uint32_t ip;
        uint16_t port;
        uint8_t out[64] = {0};
@@ -114,8 +129,9 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui
        uint32_t mpls_len = 0;
        struct qinq_hdr *qinq;
        struct vlan_hdr *vlan;
-       struct ether_hdr_arp *hdr_arp;
        uint16_t j;
+       struct igmpv2_hdr *pigmp;
+       uint8_t type;
 
        for (j = 0; j < n_pkts; ++j) {
                PREFETCH0(mbufs[j]);
@@ -124,26 +140,14 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui
                PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *));
        }
 
+       // TODO 1: check packet is long enough for Ethernet + IP + UDP = 42 bytes
        for (uint16_t j = 0; j < n_pkts; ++j) {
                hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr *);
                switch (hdr->ether_type) {
-               case ETYPE_ARP:
-                       hdr_arp = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *);
-                       if (arp_is_gratuitous(hdr_arp)) {
-                               plog_info("Received gratuitous packet \n");
-                               out[j] = OUT_DISCARD;
-                       } else if (hdr_arp->arp.oper == ARP_REQUEST) {
-                               out[j] = handle_arp_request(task, hdr_arp, (struct ether_addr *)&task->src_dst_mac[6], task->ip);
-                       } else if (hdr_arp->arp.oper == ARP_REPLY) {
-                               out[j] = handle_arp_replies(task, hdr_arp);
-                       } else {
-                               plog_info("Received unexpected ARP operation %d\n", hdr_arp->arp.oper);
-                               out[j] = OUT_DISCARD;
-                       }
-                       continue;
                case ETYPE_MPLSU:
                        mpls = (struct mpls_hdr *)(hdr + 1);
                        while (!(mpls->bytes & 0x00010000)) {
+                               // TODO: verify pcket length
                                mpls++;
                                mpls_len += sizeof(struct mpls_hdr);
                        }
@@ -199,48 +203,129 @@ static int handle_swap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, ui
                        out[j] = OUT_DISCARD;
                        continue;
                }
-               udp_hdr = (struct udp_hdr *)(ip_hdr + 1);
+               // TODO 2 : check packet is long enough for Ethernet + IP + UDP + extra header (VLAN, MPLS, ...)
                ip = ip_hdr->dst_addr;
-               ip_hdr->dst_addr = ip_hdr->src_addr;
-               ip_hdr->src_addr = ip;
-               if (ip_hdr->next_proto_id == IPPROTO_GRE) {
-                       struct gre_hdr *pgre = (struct gre_hdr *)(ip_hdr + 1);
-                       struct ipv4_hdr *inner_ip_hdr = ((struct ipv4_hdr *)(pgre + 1));
+
+               switch (ip_hdr->next_proto_id) {
+               case IPPROTO_GRE:
+                       ip_hdr->dst_addr = ip_hdr->src_addr;
+                       ip_hdr->src_addr = ip;
+
+                       pgre = (struct gre_hdr *)(ip_hdr + 1);
+                       inner_ip_hdr = ((struct ipv4_hdr *)(pgre + 1));
                        ip = inner_ip_hdr->dst_addr;
                        inner_ip_hdr->dst_addr = inner_ip_hdr->src_addr;
                        inner_ip_hdr->src_addr = ip;
+
                        udp_hdr = (struct udp_hdr *)(inner_ip_hdr + 1);
+                       // TODO 3.1 : verify proto is UPD or TCP
                        port = udp_hdr->dst_port;
                        udp_hdr->dst_port = udp_hdr->src_port;
                        udp_hdr->src_port = port;
-               } else {
+                       write_src_and_dst_mac(task, mbufs[j]);
+                       break;
+               case IPPROTO_UDP:
+               case IPPROTO_TCP:
+                       if (task->igmp_address && IS_IPV4_MCAST(rte_be_to_cpu_32(ip))) {
+                               out[j] = OUT_DISCARD;
+                               continue;
+                       }
+                       udp_hdr = (struct udp_hdr *)(ip_hdr + 1);
+                       ip_hdr->dst_addr = ip_hdr->src_addr;
+                       ip_hdr->src_addr = ip;
+
                        port = udp_hdr->dst_port;
                        udp_hdr->dst_port = udp_hdr->src_port;
                        udp_hdr->src_port = port;
+                       write_src_and_dst_mac(task, mbufs[j]);
+                       break;
+               case IPPROTO_IGMP:
+                       pigmp = (struct igmpv2_hdr *)(ip_hdr + 1);
+                       // TODO: check packet len
+                       type = pigmp->type;
+                       if (type == IGMP_MEMBERSHIP_QUERY) {
+                               if (task->igmp_address) {
+                                       // We have an address registered
+                                       if ((task->igmp_address == pigmp->group_address) || (pigmp->group_address == 0)) {
+                                               // We get a request for the registered address, or to 0.0.0.0
+                                               build_igmp_message(tbase, mbufs[j], task->igmp_address, IGMP_MEMBERSHIP_REPORT);        // replace Membership query packet with a response
+                                       } else {
+                                               // Discard as either we are not registered or this is a query for a different group
+                                               out[j] = OUT_DISCARD;
+                                               continue;
+                                       }
+                               } else {
+                                       // Discard as either we are not registered
+                                       out[j] = OUT_DISCARD;
+                                       continue;
+                               }
+                       } else {
+                               // Do not forward other IGMP packets back
+                               out[j] = OUT_DISCARD;
+                               continue;
+                       }
+                       break;
+               default:
+                       plog_warn("Unsupported IP protocol 0x%x\n", ip_hdr->next_proto_id);
+                       out[j] = OUT_DISCARD;
+                       continue;
                }
-               write_src_and_dst_mac(task, mbufs[j]);
        }
-       task_update_config(task);
        return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
 }
 
+void igmp_join_group(struct task_base *tbase, uint32_t igmp_address)
+{
+       struct task_swap *task = (struct task_swap *)tbase;
+       struct rte_mbuf *igmp_mbuf;
+       uint8_t out[64] = {0};
+       int ret;
+
+       task->igmp_address = igmp_address;
+       ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
+       if (ret != 0) {
+               plog_err("Unable to allocate igmp mbuf\n");
+               return;
+       }
+       build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_MEMBERSHIP_REPORT);
+       task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
+}
+
+void igmp_leave_group(struct task_base *tbase)
+{
+       struct task_swap *task = (struct task_swap *)tbase;
+       struct rte_mbuf *igmp_mbuf;
+       uint8_t out[64] = {0};
+       int ret;
+
+       task->igmp_address = 0;
+       ret = rte_mempool_get(task->igmp_pool, (void **)&igmp_mbuf);
+       if (ret != 0) {
+               plog_err("Unable to allocate igmp mbuf\n");
+               return;
+       }
+       build_igmp_message(tbase, igmp_mbuf, task->igmp_address, IGMP_LEAVE_GROUP);
+       task->base.tx_pkt(&task->base, &igmp_mbuf, 1, out);
+}
+
 static void init_task_swap(struct task_base *tbase, struct task_args *targ)
 {
        struct task_swap *task = (struct task_swap *)tbase;
        struct ether_addr *src_addr, *dst_addr;
 
        /*
-        * Destination MAC can come from
-        *    - pre-configured mac in case 'dst mac=xx:xx:xx:xx:xx:xx' in config file
-        *    - src mac from the packet in case 'dst mac=packet' in config file
-        *    - not written in case 'dst mac=no' in config file
-        *    - (default - no 'dst mac') src mac from the packet
-        * Source MAC can come from
-        *    - pre-configured mac in case 'src mac=xx:xx:xx:xx:xx:xx' in config file
-        *    - dst mac from the packet in case 'src mac=packet' in config file
-        *    - not written in case 'src mac=no' in config file
-        *    - (default - no 'src mac') if (tx_port) port mac
-        *    - (default - no 'src mac') if (no tx_port) dst mac from the packet
+        * The destination MAC of the outgoing packet is based on the config file:
+        *    - 'dst mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as dst mac
+        *    - 'dst mac=packet'            => the src mac of the incoming packet is used as dst mac
+        *    - (default - no 'dst mac')    => the src mac from the incoming packet is used as dst mac
+        *
+        * The source MAC of the outgoing packet is based on the config file:
+        *    - 'src mac=xx:xx:xx:xx:xx:xx' => the pre-configured mac will be used as src mac
+        *    - 'src mac=packet'            => the dst mac of the incoming packet is used as src mac
+        *    - 'src mac=hw'                => the mac address of the tx port is used as src mac
+        *                                     An error is returned if there are no physical tx ports
+        *    - (default - no 'src mac')    => if there is physical tx port, the mac of that port is used as src mac
+        *    - (default - no 'src mac')       if there are no physical tx ports the dst mac of the incoming packet
         */
 
        if (targ->flags & TASK_ARG_DST_MAC_SET) {
@@ -248,44 +333,54 @@ static void init_task_swap(struct task_base *tbase, struct task_args *targ)
                memcpy(&task->src_dst_mac[0], dst_addr, sizeof(*src_addr));
        }
 
+       PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_SRC_MAC, "src mac must be set in swap mode, by definition => src mac=no is not supported\n");
+       PROX_PANIC(targ->flags & TASK_ARG_DO_NOT_SET_DST_MAC, "dst mac must be set in swap mode, by definition => dst mac=no is not supported\n");
+
        if (targ->flags & TASK_ARG_SRC_MAC_SET) {
                src_addr =  &targ->esaddr;
                memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
                plog_info("\t\tCore %d: src mac set from config file\n", targ->lconf->id);
-       } else if (targ->nb_txports) {
-               src_addr = &prox_port_cfg[task->base.tx_params_hw.tx_port_queue[0].port].eth_addr;
-               memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
-               if (targ->flags & TASK_ARG_HW_SRC_MAC){
+       } else {
+               if (targ->flags & TASK_ARG_HW_SRC_MAC)
+                       PROX_PANIC(targ->nb_txports == 0, "src mac set to hw but no tx port\n");
+               if (targ->nb_txports) {
+                       src_addr = &prox_port_cfg[task->base.tx_params_hw.tx_port_queue[0].port].eth_addr;
+                       memcpy(&task->src_dst_mac[6], src_addr, sizeof(*dst_addr));
                        targ->flags |= TASK_ARG_SRC_MAC_SET;
                        plog_info("\t\tCore %d: src mac set from port\n", targ->lconf->id);
                }
        }
        task->runtime_flags = targ->flags;
-       task->ip = rte_cpu_to_be_32(targ->local_ipv4);
-       task->tmp_ip = task->ip;
+       task->igmp_address =  rte_cpu_to_be_32(targ->igmp_address);
+       if (task->igmp_pool == NULL) {
+               static char name[] = "igmp0_pool";
+               name[4]++;
+               struct rte_mempool *ret = rte_mempool_create(name, NB_IGMP_MBUF, IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF,
+                       sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
+                       rte_socket_id(), 0);
+               PROX_PANIC(ret == NULL, "Failed to allocate IGMP memory pool on socket %u with %u elements\n",
+                       rte_socket_id(), NB_IGMP_MBUF);
+               plog_info("\t\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_IGMP_MBUF,
+                       IGMP_MBUF_SIZE, NB_CACHE_IGMP_MBUF, rte_socket_id());
+               task->igmp_pool = ret;
+       }
+       task->local_ipv4 = rte_cpu_to_be_32(targ->local_ipv4);
+
+       struct prox_port_cfg *port = find_reachable_port(targ);
+       if (port) {
+               task->offload_crc = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
+       }
 }
 
 static struct task_init task_init_swap = {
        .mode_str = "swap",
        .init = init_task_swap,
        .handle = handle_swap_bulk,
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
-       .size = sizeof(struct task_swap),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
-};
-
-static struct task_init task_init_swap_arp = {
-       .mode_str = "swap",
-       .sub_mode_str = "l3",
-       .init = init_task_swap,
-       .handle = handle_swap_bulk,
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = 0,
        .size = sizeof(struct task_swap),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_swap(void)
 {
        reg_task(&task_init_swap);
-       reg_task(&task_init_swap_arp);
 }