Preparation for packet mis-ordering stats
[samplevnf.git] / VNFs / DPPD-PROX / handle_gen.c
index e5e43fc..6a517c3 100644 (file)
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2010-2017 Intel Corporation
+// Copyright (c) 2010-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,7 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */
-
 #include <rte_mbuf.h>
 #include <pcap.h>
 #include <string.h>
@@ -22,6 +21,8 @@
 #include <rte_version.h>
 #include <rte_byteorder.h>
 #include <rte_ether.h>
+#include <rte_hash_crc.h>
+#include <rte_malloc.h>
 
 #include "prox_shared.h"
 #include "random.h"
 #include "local_mbuf.h"
 #include "arp.h"
 #include "tx_pkt.h"
-#include <rte_hash_crc.h>
+#include "handle_master.h"
+#include "defines.h"
+#include "prox_ipv6.h"
 
 struct pkt_template {
-       uint64_t dst_mac;
-       uint32_t ip_src;
-       uint32_t ip_dst_pos;
        uint16_t len;
        uint16_t l2_len;
        uint16_t l3_len;
-       uint8_t  buf[ETHER_MAX_LEN];
+       uint8_t  *buf;
 };
 
-#define FLAG_DST_MAC_KNOWN     1
-#define FLAG_L3_GEN            2
-#define FLAG_RANDOM_IPS                4
+#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
 
-#define MAX_TEMPLATE_INDEX     65536
-#define TEMPLATE_INDEX_MASK    (MAX_TEMPLATE_INDEX - 1)
-#define MBUF_ARP               MAX_TEMPLATE_INDEX
+#define DO_PANIC       1
+#define DO_NOT_PANIC   0
 
-#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
+#define FROM_PCAP      1
+#define NOT_FROM_PCAP  0
+
+#define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1
 
 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
 {
@@ -87,12 +87,12 @@ struct task_gen_pcap {
        uint32_t n_pkts;
        uint64_t last_tsc;
        uint64_t *proto_tsc;
+       uint32_t socket_id;
 };
 
 struct task_gen {
        struct task_base base;
        uint64_t hz;
-       uint64_t link_speed;
        struct token_time token_time;
        struct local_mbuf local_mbuf;
        struct pkt_template *pkt_template; /* packet templates used at runtime */
@@ -101,14 +101,17 @@ struct task_gen {
        uint64_t new_rate_bps;
        uint64_t pkt_queue_index;
        uint32_t n_pkts; /* number of packets in pcap */
+       uint32_t orig_n_pkts; /* number of packets in pcap */
        uint32_t pkt_idx; /* current packet from pcap */
        uint32_t pkt_count; /* how many pakets to generate */
+       uint32_t max_frame_size;
        uint32_t runtime_flags;
        uint16_t lat_pos;
        uint16_t packet_id_pos;
        uint16_t accur_pos;
        uint16_t sig_pos;
        uint32_t sig;
+       uint32_t socket_id;
        uint8_t generator_id;
        uint8_t n_rands; /* number of randoms */
        uint8_t min_bulk_size;
@@ -122,24 +125,30 @@ struct task_gen {
                uint16_t rand_offset; /* each random has an offset*/
                uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
        } rand[64];
-       uint64_t accur[64];
+       uint64_t accur[ACCURACY_WINDOW];
        uint64_t pkt_tsc_offset[64];
        struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
-       struct ether_addr gw_mac;
-       struct ether_addr  src_mac;
-       struct rte_hash  *mac_hash;
-       uint64_t *dst_mac;
-       uint32_t gw_ip;
-       uint32_t src_ip;
+       prox_rte_ether_addr  src_mac;
        uint8_t flags;
        uint8_t cksum_offload;
+       struct prox_port_cfg *port;
+       uint64_t *bytes_to_tsc;
+       uint32_t imix_pkt_sizes[MAX_IMIX_PKTS];
+       uint32_t imix_nb_pkts;
+       uint32_t new_imix_nb_pkts;
 } __rte_cache_aligned;
 
-static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
+static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes);
+static void task_gen_reset_pkt_templates_content(struct task_gen *task);
+static void task_gen_pkt_template_recalc_metadata(struct task_gen *task);
+static int check_all_pkt_size(struct task_gen *task, int do_panic);
+static int check_all_fields_in_bounds(struct task_gen *task, int do_panic);
+
+static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip)
 {
        /* Optimize for common case of IPv4 header without options. */
        if (ip->version_ihl == 0x45)
-               return sizeof(struct ipv4_hdr);
+               return sizeof(prox_rte_ipv4_hdr);
        if (unlikely(ip->version_ihl >> 4 != 4)) {
                plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
                return 0;
@@ -149,16 +158,16 @@ static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
 
 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
 {
-       *l2_len = sizeof(struct ether_hdr);
+       *l2_len = sizeof(prox_rte_ether_hdr);
        *l3_len = 0;
-       struct vlan_hdr *vlan_hdr;
-       struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
-       struct ipv4_hdr *ip;
+       prox_rte_vlan_hdr *vlan_hdr;
+       prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
+       prox_rte_ipv4_hdr *ip;
        uint16_t ether_type = eth_hdr->ether_type;
 
        // Unstack VLAN tags
-       while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(struct vlan_hdr) < len)) {
-               vlan_hdr = (struct vlan_hdr *)(pkt + *l2_len);
+       while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
+               vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len);
                *l2_len +=4;
                ether_type = vlan_hdr->eth_proto;
        }
@@ -171,11 +180,11 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui
        case ETYPE_MPLSM:
                *l2_len +=4;
                break;
+       case ETYPE_IPv6:
        case ETYPE_IPv4:
                break;
        case ETYPE_EoGRE:
        case ETYPE_ARP:
-       case ETYPE_IPv6:
                *l2_len = 0;
                break;
        default:
@@ -185,8 +194,9 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui
        }
 
        if (*l2_len) {
-               struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + *l2_len);
-               *l3_len = ipv4_get_hdr_len(ip);
+               prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len);
+               if (ip->version_ihl >> 4 == 4)
+                       *l3_len = ipv4_get_hdr_len(ip);
        }
 }
 
@@ -195,9 +205,20 @@ static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_temp
        uint16_t l2_len = pkt_template->l2_len;
        uint16_t l3_len = pkt_template->l3_len;
 
-       if (l2_len) {
-               struct ipv4_hdr *ip = (struct ipv4_hdr*)(hdr + l2_len);
+       prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len);
+       if (l3_len) {
                prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
+       } else if (ip->version_ihl >> 4 == 6) {
+               prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len);
+               if (ip6->proto == IPPROTO_UDP) {
+                       prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
+                       udp->dgram_cksum = 0;
+                       udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
+               } else if (ip6->proto == IPPROTO_TCP) {
+                       prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
+                       tcp->cksum = 0;
+                       tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
+               }
        }
 }
 
@@ -207,22 +228,6 @@ static void task_gen_reset_token_time(struct task_gen *task)
        token_time_reset(&task->token_time, rte_rdtsc(), 0);
 }
 
-static void start(struct task_base *tbase)
-{
-       struct task_gen *task = (struct task_gen *)tbase;
-       task->pkt_queue_index = 0;
-
-       task_gen_reset_token_time(task);
-}
-
-static void start_pcap(struct task_base *tbase)
-{
-       struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
-       /* When we start, the first packet is sent immediately. */
-       task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
-       task->pkt_idx = 0;
-}
-
 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
 {
        if (task->pkt_count == (uint32_t)-1)
@@ -287,20 +292,14 @@ static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf,
        return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
 }
 
-static uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
+static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
 {
-       const uint64_t hz = task->hz;
-       const uint64_t bytes_per_hz = task->link_speed;
-
-       if (bytes_per_hz == UINT64_MAX)
-               return 0;
-
-       return hz * bytes / bytes_per_hz;
+       return task->bytes_to_tsc[bytes];
 }
 
 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
 {
-       return pkt_idx + 1 == task->n_pkts? 0 : pkt_idx + 1;
+       return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1;
 }
 
 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
@@ -337,13 +336,7 @@ static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *t
         */
        for (uint16_t j = 0; j < max_bulk; ++j) {
                struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
-               if (unlikely((task->flags & (FLAG_L3_GEN | FLAG_DST_MAC_KNOWN)) == FLAG_L3_GEN))  {
-                       // Generator is supposed to get MAC address - MAC is still unknown for this template
-                       // generate ARP Request to gateway instead of the intended packet
-                       pkt_size = 60;
-               } else {
-                       pkt_size = pktpl->len;
-               }
+               pkt_size = pktpl->len;
                uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
                if (pkt_len + would_send_bytes > task->token_time.bytes_now)
                        break;
@@ -360,106 +353,6 @@ static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *t
        return send_bulk;
 }
 
-static inline void create_arp(struct rte_mbuf *mbuf, uint8_t *pkt_hdr, uint64_t *src_mac, uint32_t ip_dst, uint32_t ip_src)
-{
-       uint64_t mac_bcast = 0xFFFFFFFFFFFF;
-       rte_pktmbuf_pkt_len(mbuf) = 42;
-       rte_pktmbuf_data_len(mbuf) = 42;
-       init_mbuf_seg(mbuf);
-       struct ether_hdr_arp *hdr_arp = (struct ether_hdr_arp *)pkt_hdr;
-
-       memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &mac_bcast, 6);
-       memcpy(&hdr_arp->ether_hdr.s_addr.addr_bytes, src_mac, 6);
-       hdr_arp->ether_hdr.ether_type = ETYPE_ARP;
-       hdr_arp->arp.htype = 0x100,
-       hdr_arp->arp.ptype = 0x0008;
-       hdr_arp->arp.hlen = 6;
-       hdr_arp->arp.plen = 4;
-       hdr_arp->arp.oper = 0x100;
-       hdr_arp->arp.data.spa = ip_src;
-       hdr_arp->arp.data.tpa = ip_dst;
-       memset(&hdr_arp->arp.data.tha, 0, sizeof(struct ether_addr));
-       memcpy(&hdr_arp->arp.data.sha, src_mac, sizeof(struct ether_addr));
-}
-
-static int task_gen_write_dst_mac(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
-{
-       uint32_t ip_dst_pos, ip_src_pos, ip_dst, ip_src;
-       uint16_t i;
-       int ret;
-
-       if (task->flags & FLAG_L3_GEN) {
-               if (task->gw_ip) {
-                       if (unlikely((task->flags & FLAG_DST_MAC_KNOWN) == 0))  {
-                               for (i = 0; i < count; ++i) {
-                                       struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK];
-                                       create_arp(mbufs[i], pkt_hdr[i], (uint64_t *)&pktpl->buf[6], task->gw_ip, pktpl->ip_src);
-                                       mbufs[i]->udata64 |= MBUF_ARP;
-                               }
-                       } else {
-                               for (i = 0; i < count; ++i) {
-                                       struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i];
-                                       memcpy(&hdr->d_addr.addr_bytes, &task->gw_mac, 6);
-                               }
-                       }
-               } else if (unlikely((task->flags & FLAG_RANDOM_IPS) != 0) || (task->n_pkts >= 4)){
-                       // Find mac in lookup table. Send ARP if not found
-                       int32_t positions[MAX_PKT_BURST], idx;
-                       void *keys[MAX_PKT_BURST];
-                       uint32_t key[MAX_PKT_BURST];
-                       for (i = 0; i < count; ++i) {
-                               uint8_t *hdr = (uint8_t *)pkt_hdr[i];
-                               struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK];
-                               ip_dst_pos = pktpl->ip_dst_pos;
-                               ip_dst = *(uint32_t *)(hdr + ip_dst_pos);
-                               key[i] = ip_dst;
-                               keys[i] = &key[i];
-                       }
-                       ret = rte_hash_lookup_bulk(task->mac_hash, (const void **)&keys, count, positions);
-                       if (unlikely(ret < 0)) {
-                               plogx_err("lookup_bulk failed in mac_hash\n");
-                               tx_pkt_drop_all((struct task_base *)task, mbufs, count, NULL);
-                               return -1;
-                       }
-                       for (i = 0; i < count; ++i) {
-                               idx = positions[i];
-                               if (unlikely(idx < 0)) {
-                                       // mac not found for this IP
-                                       struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK];
-                                       uint8_t *hdr = (uint8_t *)pkt_hdr[i];
-                                       ip_src_pos = pktpl->ip_dst_pos - 4;
-                                       ip_src = *(uint32_t *)(hdr + ip_src_pos);
-                                       create_arp(mbufs[i], pkt_hdr[i], (uint64_t *)&hdr[6], key[i], ip_src);
-                                       mbufs[i]->udata64 |= MBUF_ARP;
-                               } else {
-                                       // mac found for this IP
-                                       struct ether_hdr_arp *hdr_arp = (struct ether_hdr_arp *)pkt_hdr[i];
-                                       memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &task->dst_mac[idx], 6);
-                               }
-                       }
-               } else {
-                       for (i = 0; i < count; ++i) {
-                               uint8_t *hdr = (uint8_t *)pkt_hdr[i];
-                               struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK];
-
-                               // Check if packet template already has the mac
-                               if (unlikely(pktpl->dst_mac == 0)) {
-                                       // no random_ip, can take from from packet template but no mac (yet)
-                                       uint32_t ip_dst_pos = pktpl->ip_dst_pos;
-                                       ip_dst = *(uint32_t *)(hdr + ip_dst_pos);
-                                       create_arp(mbufs[i], pkt_hdr[i], (uint64_t *)&pktpl->buf[6], ip_dst, pktpl->ip_src);
-                                       mbufs[i]->udata64 |= MBUF_ARP;
-                               } else {
-                                       // no random ip, mac known
-                                       struct ether_hdr_arp *hdr_arp = (struct ether_hdr_arp *)pkt_hdr[i];
-                                       memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &pktpl->dst_mac, 6);
-                               }
-                       }
-               }
-       }
-       return 0;
-}
-
 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
 {
        uint32_t ret, ret_tmp;
@@ -491,9 +384,10 @@ static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, ui
        *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
 }
 
-static void task_gen_apply_sig(struct task_gen *task, uint8_t *pkt_hdr)
+static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst)
 {
-       *(uint32_t *)(pkt_hdr + task->sig_pos) = task->sig;
+       if (task->sig_pos)
+               *(uint32_t *)(dst->buf + task->sig_pos) = task->sig;
 }
 
 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
@@ -501,26 +395,12 @@ static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf
        if (!task->accur_pos)
                return;
 
-       /* The accuracy of task->pkt_queue_index - 64 is stored in
-          packet task->pkt_queue_index. The ID modulo 64 is the
+       /* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in
+          packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the
           same. */
        for (uint16_t j = 0; j < count; ++j) {
-               if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
-                       uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & 63];
-                       task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
-               }
-       }
-}
-
-static void task_gen_apply_all_sig(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
-{
-       if (!task->sig_pos)
-               return;
-
-       for (uint16_t j = 0; j < count; ++j) {
-               if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
-                       task_gen_apply_sig(task, pkt_hdr[j]);
-               }
+               uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)];
+               task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
        }
 }
 
@@ -537,11 +417,9 @@ static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf
                return;
 
        for (uint16_t i = 0; i < count; ++i) {
-               if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
-                       struct unique_id id;
-                       unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
-                       task_gen_apply_unique_id(task, pkt_hdr[i], &id);
-               }
+               struct unique_id id;
+               unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
+               task_gen_apply_unique_id(task, pkt_hdr[i], &id);
        }
 }
 
@@ -555,11 +433,9 @@ static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **m
 
        uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
        for (uint16_t i = 0; i < count; ++i) {
-               if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
-                       struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
-                       checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
-                       pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
-               }
+               struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
+               checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
+               pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
        }
 }
 
@@ -579,8 +455,12 @@ static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t coun
        uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
        struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
        uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
+#ifdef NO_EXTRAPOLATION
+       uint64_t bulk_duration = task->pkt_tsc_offset[count - 1];
+#else
        uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
        uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
+#endif
 
        return bulk_duration;
 }
@@ -615,6 +495,14 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr,
           simply sleeping until delta_t is zero would leave a period
           of silence on the line. The error has been introduced
           earlier, but the packets have already been sent. */
+
+       /* This happens typically if previous bulk was delayed
+          by an interrupt e.g.  (with Time in nsec)
+          Time x: sleep 4 microsec
+          Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes)
+          Time x+5000: send 16 packets (16 packets as 1000 nsec)
+          When we send the 16 packets, the 64 ealier packets are not yet
+          fully sent */
        if (tx_tsc < task->earliest_tsc_next_pkt)
                delta_t = task->earliest_tsc_next_pkt - tx_tsc;
        else
@@ -623,12 +511,10 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr,
        for (uint16_t i = 0; i < count; ++i) {
                uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
                const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
-
                *pos = pkt_tsc >> LATENCY_ACCURACY;
        }
 
        uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
-
        task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
        write_tsc_after = rte_rdtsc();
        task->write_duration_estimate = write_tsc_after - write_tsc_before;
@@ -638,6 +524,7 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr,
        do {
                tsc_before_tx = rte_rdtsc();
        } while (tsc_before_tx < tx_tsc);
+
        return tsc_before_tx;
 }
 
@@ -650,7 +537,7 @@ static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint6
        uint64_t first_accuracy_idx = task->pkt_queue_index - count;
 
        for (uint32_t i = 0; i < count; ++i) {
-               uint32_t accuracy_idx = (first_accuracy_idx + i) & 63;
+               uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1);
 
                task->accur[accuracy_idx] = accur;
        }
@@ -674,108 +561,361 @@ static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbuf
                struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
                struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
                pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
-               mbufs[i]->udata64 = task->pkt_idx & TEMPLATE_INDEX_MASK;
-               struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i];
+               prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i];
                if (task->lat_enabled) {
+#ifdef NO_EXTRAPOLATION
+                       task->pkt_tsc_offset[i] = 0;
+#else
                        task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
+#endif
                        will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
                }
                task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
        }
 }
 
+static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap)
+{
+       size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
+       size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template);
+       task->pkt_template = prox_zmalloc(mem_size, task->socket_id);
+       task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id);
+
+       if (task->pkt_template == NULL || task->pkt_template_orig == NULL) {
+               plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template");
+               return -1;
+       }
+
+       for (size_t i = 0; i < orig_nb_pkts; i++) {
+               task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
+               if (task->pkt_template_orig[i].buf == NULL) {
+                       plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
+                       return -1;
+               }
+       }
+       for (size_t i = 0; i < nb_pkts; i++) {
+               task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
+               if (task->pkt_template[i].buf == NULL) {
+                       plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic)
+{
+       // Need to free up bufs allocated in previous (longer) imix
+       for (size_t i = nb_pkts; i < task->n_pkts; i++) {
+               if (task->pkt_template[i].buf) {
+                       rte_free(task->pkt_template[i].buf);
+                       task->pkt_template[i].buf = NULL;
+               }
+       }
+
+       size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
+       size_t old_mem_size = task->n_pkts * sizeof(*task->pkt_template);
+       if (old_mem_size > mem_size)
+               old_mem_size = mem_size;
+
+       struct pkt_template *ptr;
+
+       // re-allocate memory for new pkt_template (this might allocate additional memory or free up some...)
+       if ((ptr = rte_malloc_socket(NULL, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) {
+               memcpy(ptr, task->pkt_template, old_mem_size);
+               rte_free(task->pkt_template);
+               task->pkt_template = ptr;
+       } else {
+               plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size);
+               return -1;
+       }
+
+       // Need to allocate bufs for new template but no need to reallocate for existing ones
+       for (size_t i = task->n_pkts; i < nb_pkts; ++i) {
+               task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
+               if (task->pkt_template[i].buf == NULL) {
+                       plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i);
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
+{
+       const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr);
+       const uint16_t max_len = task->max_frame_size;
+
+       if (do_panic) {
+               PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
+               PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
+               PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
+               return 0;
+       } else {
+               if (pkt_size == 0) {
+                       plog_err("Invalid packet size length (no packet defined?)\n");
+                       return -1;
+               }
+               if (pkt_size > max_len) {
+                       if (pkt_size >  PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4)
+                               plog_err("pkt_size too high and jumbo frames disabled\n");
+                       else
+                               plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len);
+                       return -1;
+               }
+               if (pkt_size < min_len) {
+                       plog_err("pkt_size out of range (must be >= %u)\n", min_len);
+                       return -1;
+               }
+               return 0;
+       }
+}
+
+static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
+{
+       if (task->lat_enabled) {
+               uint32_t pos_beg = task->lat_pos;
+               uint32_t pos_end = task->lat_pos + 3U;
+
+               if (do_panic)
+                       PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
+                          pos_beg, pos_end, pkt_size);
+               else if (pkt_size <= pos_end) {
+                       plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
+                       return -1;
+               }
+       }
+       if (task->packet_id_pos) {
+               uint32_t pos_beg = task->packet_id_pos;
+               uint32_t pos_end = task->packet_id_pos + 4U;
+
+               if (do_panic)
+                       PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
+                          pos_beg, pos_end, pkt_size);
+               else if (pkt_size <= pos_end) {
+                       plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
+                       return -1;
+               }
+       }
+       if (task->accur_pos) {
+               uint32_t pos_beg = task->accur_pos;
+               uint32_t pos_end = task->accur_pos + 3U;
+
+               if (do_panic)
+                       PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n",
+                          pos_beg, pos_end, pkt_size);
+               else if (pkt_size <= pos_end) {
+                       plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t n_orig_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
+{
+       size_t k;
+       uint32_t l4_len;
+       prox_rte_ipv4_hdr *ip;
+       struct pkt_template *template;
+
+       for (size_t j = 0; j < nb_pkt_sizes; ++j) {
+               for (size_t i = 0; i < n_orig_pkts; ++i) {
+                       k = j * n_orig_pkts + i;
+                       template = &task->pkt_template[k];
+                       if (template->l2_len == 0)
+                               continue;
+                       ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
+                       ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len);
+                       l4_len = pkt_sizes[j] - template->l2_len - template->l3_len;
+                       ip->hdr_checksum = 0;
+                       prox_ip_cksum_sw(ip);
+
+                       if (ip->next_proto_id == IPPROTO_UDP) {
+                               prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
+                               udp->dgram_len = rte_bswap16(l4_len);
+                               prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
+                       } else if (ip->next_proto_id == IPPROTO_TCP) {
+                               prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
+                               prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
+                       }
+               }
+       }
+       return 0;
+}
+
+static int task_gen_apply_imix(struct task_gen *task, int do_panic)
+{
+       struct pkt_template *ptr;
+       int rc;
+       task->imix_nb_pkts = task->new_imix_nb_pkts;
+       uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
+
+       if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0))
+               return rc;
+
+       task->n_pkts = n_pkts;
+       if (task->pkt_idx >= n_pkts)
+               task->pkt_idx = 0;
+       task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
+       task_gen_reset_pkt_templates_content(task);
+       task_gen_pkt_template_recalc_metadata(task);
+       check_all_pkt_size(task, DO_NOT_PANIC);
+       check_all_fields_in_bounds(task, DO_NOT_PANIC);
+       task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
+       return 0;
+}
+
 static void task_gen_update_config(struct task_gen *task)
 {
        if (task->token_time.cfg.bpp != task->new_rate_bps)
                task_gen_reset_token_time(task);
+       if (task->new_imix_nb_pkts)
+               task_gen_apply_imix(task, DO_NOT_PANIC);
+       task->new_imix_nb_pkts = 0;
 }
 
-static inline void handle_arp_pkts(struct task_gen *task, struct rte_mbuf **mbufs, uint16_t n_pkts)
+static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
 {
-       int j;
-       int ret;
-       struct ether_hdr_arp *hdr;
-       uint8_t out[MAX_PKT_BURST];
-       static struct my_arp_t arp_reply = {
-               .htype = 0x100,
-               .ptype = 8,
-               .hlen = 6,
-               .plen = 4,
-               .oper = 0x200
-       };
-       static struct my_arp_t arp_request = {
-               .htype = 0x100,
-               .ptype = 8,
-               .hlen = 6,
-               .plen = 4,
-               .oper = 0x100
-       };
-
-       for (j = 0; j < n_pkts; ++j) {
-               PREFETCH0(mbufs[j]);
-       }
-       for (j = 0; j < n_pkts; ++j) {
-               PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *));
-       }
-       for (j = 0; j < n_pkts; ++j) {
-               hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *);
-               if (hdr->ether_hdr.ether_type == ETYPE_ARP) {
-                       if (memcmp(&hdr->arp, &arp_reply, 8) == 0) {
-                               uint32_t ip = hdr->arp.data.spa;
-                               // plog_info("Received ARP Reply for IP %x\n",ip);
-                               if (ip == task->gw_ip) {
-                                       memcpy(&task->gw_mac, &hdr->arp.data.sha, 6);;
-                                       task->flags |= FLAG_DST_MAC_KNOWN;
-                                       out[j] = OUT_HANDLED;
-                                       continue;
-                               } else if ((task->n_pkts >= 4) || (task->flags & FLAG_RANDOM_IPS)) {
-                                       // Ideally, we should add the key when making the arp request,
-                                       // We should only store the mac address key was created.
-                                       // Here we are storing MAC we did not asked for...
-                                       ret = rte_hash_add_key(task->mac_hash, (const void *)&ip);
-                                       if (ret < 0) {
-                                               plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip));
-                                               out[j] = OUT_DISCARD;
-                                       } else {
-                                               task->dst_mac[ret] = *(uint64_t *)&(hdr->arp.data.sha);
-                                               out[j] = OUT_HANDLED;
-                                       }
+       struct task_base *tbase = (struct task_base *)task;
+       if (bit_pos < 32) {
+               build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
+               if (mask & 1) {
+                       build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
+               }
+       } else {
+               register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+       }
+}
+
+static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits)
+{
+       struct task_base *tbase = (struct task_base *)task;
+       if (var_bit_pos < 32) {
+               build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
+               if (mask & 1) {
+                       int byte_pos = (var_bit_pos + init_var_bit_pos) / 8;
+                       int bit_pos = (var_bit_pos + init_var_bit_pos) % 8;
+                       val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos);
+                       build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
+               }
+       } else {
+               for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++)
+                       val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i];
+               register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+       }
+}
+
+static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
+{
+       struct task_base *tbase = (struct task_base *)task;
+       int i, len, fixed;
+       unsigned int offset;
+       uint32_t mask, ip_len;
+       struct ipv6_addr *ip6_src = NULL;
+       uint32_t *ip_src;
+
+       for (uint32_t i = 0; i < task->n_pkts; ++i) {
+               struct pkt_template *pktpl = &task->pkt_template[i];
+               unsigned int ip_src_pos = 0;
+               int ipv4 = 0;
+               unsigned int l2_len = sizeof(prox_rte_ether_hdr);
+
+               uint8_t *pkt = pktpl->buf;
+               prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
+               uint16_t ether_type = eth_hdr->ether_type;
+               prox_rte_vlan_hdr *vlan_hdr;
+               prox_rte_ipv4_hdr *ip;
+
+               // Unstack VLAN tags
+               while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) {
+                       vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len);
+                       l2_len +=4;
+                       ether_type = vlan_hdr->eth_proto;
+               }
+               if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
+                       l2_len +=4;
+                       ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
+                       if (ip->version_ihl >> 4 == 4)
+                               ipv4 = 1;
+                       else if (ip->version_ihl >> 4 != 6)     // Version field at same location for IPv4 and IPv6
+                               continue;
+               } else if (ether_type == ETYPE_IPv4) {
+                       ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
+                       PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);        // Invalid Packet
+                       ipv4 = 1;
+               } else if (ether_type == ETYPE_IPv6) {
+                       ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
+                       PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4);        // Invalid Packet
+               } else {
+                       continue;
+               }
+
+               PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n");
+               PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n");
+               if (ipv4) {
+                       // Even if IPv4 header contains options, options are after ip src and dst
+                       ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t);
+                       ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
+                       plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
+                       register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+                       ip_len = sizeof(uint32_t);
+               } else {
+                       ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr);
+                       ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos));
+                       plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes));
+                       register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
+                       ip_len = sizeof(struct ipv6_addr);
+               }
+
+               for (int j = 0; j < task->n_rands; j++) {
+                       offset = task->rand[j].rand_offset;
+                       len = task->rand[j].rand_len;
+                       mask = task->rand[j].rand_mask;
+                       fixed = task->rand[j].fixed_bits;
+                       plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
+                       if (offset >= ip_src_pos + ip_len)      // First random bit after IP
+                               continue;
+                       if (offset + len < ip_src_pos)          // Last random bit before IP
+                               continue;
+
+                       if (ipv4) {
+                               if (offset >= ip_src_pos) {
+                                       int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
+                                       mask = mask & ip_src_mask;
+                                       fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
+                                       build_value(task, mask, 0, 0, fixed);
+                               } else {
+                                       int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
+                                       mask = mask << bits;
+                                       fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
+                                       build_value(task, mask, 0, 0, fixed);
+                               }
+                       } else {
+                               // We do not support when random partially covers IP - either starting before or finishing after
+                               if (offset + len >= ip_src_pos + ip_len) { // len over the ip
+                                       plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
                                        continue;
                                }
-                               // Need to find template back...
-                               // Only try this if there are few templates
-                               for (unsigned int idx = 0; idx < task->n_pkts; idx++) {
-                                       struct pkt_template *pktpl = &task->pkt_template[idx];
-                                       uint32_t ip_dst_pos = pktpl->ip_dst_pos;
-                                       uint32_t *ip_dst = (uint32_t *)(((uint8_t *)pktpl->buf) + ip_dst_pos);
-                                       if (*ip_dst == ip) {
-                                               pktpl->dst_mac = *(uint64_t *)&(hdr->arp.data.sha);
-                                       }
-                                       out[j] = OUT_HANDLED;
+                               if (offset < ip_src_pos) {
+                                       plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
+                                       continue;
                                }
-                       } else if (memcmp(&hdr->arp, &arp_request, 8) == 0) {
-                               struct ether_addr s_addr;
-                               if (!task->src_ip) {
-                                       create_mac(hdr, &s_addr);
-                                       prepare_arp_reply(hdr, &s_addr);
-                                       memcpy(hdr->ether_hdr.d_addr.addr_bytes, hdr->ether_hdr.s_addr.addr_bytes, 6);
-                                       memcpy(hdr->ether_hdr.s_addr.addr_bytes, &s_addr, 6);
-                                       out[j] = 0;
-                               } else if (hdr->arp.data.tpa == task->src_ip) {
-                                       prepare_arp_reply(hdr, &task->src_mac);
-                                       memcpy(hdr->ether_hdr.d_addr.addr_bytes, hdr->ether_hdr.s_addr.addr_bytes, 6);
-                                       memcpy(hdr->ether_hdr.s_addr.addr_bytes, &task->src_mac, 6);
-                                       out[j] = 0;
-                               } else {
-                                       out[j] = OUT_DISCARD;
-                                       plogx_dbg("Received ARP on unexpected IP %x, expecting %x\n", rte_be_to_cpu_32(hdr->arp.data.tpa), rte_be_to_cpu_32(task->src_ip));
+                               // Even for IPv6 the random mask supported by PROX are 32 bits only
+                               struct ipv6_addr fixed_ipv6;
+                               uint init_var_byte_pos = (offset - ip_src_pos);
+                               for (uint i = 0; i < sizeof(struct ipv6_addr); i++) {
+                                       if (i < init_var_byte_pos)
+                                               fixed_ipv6.bytes[i] = ip6_src->bytes[i];
+                                       else if (i < init_var_byte_pos + len)
+                                               fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF;
+                                       else
+                                               fixed_ipv6.bytes[i] = ip6_src->bytes[i];
                                }
+                               build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6);
                        }
-               } else {
-                       out[j] = OUT_DISCARD;
                }
        }
-       ret = task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
 }
 
 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
@@ -786,10 +926,6 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
 
        int i, j;
 
-       if (unlikely((task->flags & FLAG_L3_GEN) && (n_pkts != 0))) {
-               handle_arp_pkts(task, mbufs, n_pkts);
-       }
-
        task_gen_update_config(task);
 
        if (task->pkt_count == 0) {
@@ -802,7 +938,7 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
        token_time_update(&task->token_time, rte_rdtsc());
 
        uint32_t would_send_bytes;
-       const uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
+       uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
 
        if (send_bulk == 0)
                return 0;
@@ -817,10 +953,7 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
        task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
        task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
        task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
-       if (task_gen_write_dst_mac(task, new_pkts, pkt_hdr, send_bulk) < 0)
-               return 0;
        task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
-       task_gen_apply_all_sig(task, new_pkts, pkt_hdr, send_bulk);
        task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
 
        uint64_t tsc_before_tx;
@@ -829,6 +962,20 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin
        task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
        ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
        task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
+
+       // If we failed to send some packets, we need to do some clean-up:
+
+       if (unlikely(ret)) {
+               // We need re-use the packets indexes not being sent
+               // Hence non-sent packets will not be considered as lost by the receiver when it looks at
+               // packet ids. This should also increase the percentage of packets used for latency measurements
+               task->pkt_queue_index -= ret;
+
+               // In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong
+               // This would result in under-estimated latency (up to 0 or negative)
+               uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret);
+               task->earliest_tsc_next_pkt -= bulk_duration;
+       }
        return ret;
 }
 
@@ -838,14 +985,17 @@ static void init_task_gen_seeds(struct task_gen *task)
                random_init_seed(&task->rand[i].state);
 }
 
-static uint32_t pcap_count_pkts(pcap_t *handle)
+static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size)
 {
        struct pcap_pkthdr header;
        const uint8_t *buf;
        uint32_t ret = 0;
+       *max_frame_size = 0;
        long pkt1_fpos = ftell(pcap_file(handle));
 
        while ((buf = pcap_next(handle, &header))) {
+               if (header.len > *max_frame_size)
+                       *max_frame_size = header.len;
                ret++;
        }
        int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
@@ -862,7 +1012,7 @@ static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
        return (tot_inter_pkt + n / 2)/n;
 }
 
-static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp)
+static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size)
 {
        struct pcap_pkthdr header;
        const uint8_t *buf;
@@ -873,7 +1023,7 @@ static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts
 
                PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
                proto[i].len = header.len;
-               len = RTE_MIN(header.len, sizeof(proto[i].buf));
+               len = RTE_MIN(header.len, max_frame_size);
                if (header.len > len)
                        plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
 
@@ -905,33 +1055,6 @@ static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts
        return 0;
 }
 
-static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
-{
-       const uint16_t min_len = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr);
-       const uint16_t max_len = ETHER_MAX_LEN - 4;
-
-       if (do_panic) {
-               PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
-               PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
-               PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
-               return 0;
-       } else {
-               if (pkt_size == 0) {
-                       plog_err("Invalid packet size length (no packet defined?)\n");
-                       return -1;
-               }
-               if (pkt_size > max_len) {
-                       plog_err("pkt_size out of range (must be <= %u)\n", max_len);
-                       return -1;
-               }
-               if (pkt_size < min_len) {
-                       plog_err("pkt_size out of range (must be >= %u)\n", min_len);
-                       return -1;
-               }
-               return 0;
-       }
-}
-
 static int check_all_pkt_size(struct task_gen *task, int do_panic)
 {
        int rc;
@@ -942,31 +1065,14 @@ static int check_all_pkt_size(struct task_gen *task, int do_panic)
        return 0;
 }
 
-static void check_fields_in_bounds(struct task_gen *task)
+static int check_all_fields_in_bounds(struct task_gen *task, int do_panic)
 {
-       const uint32_t pkt_size = task->pkt_template[0].len;
-
-       if (task->lat_enabled) {
-               uint32_t pos_beg = task->lat_pos;
-               uint32_t pos_end = task->lat_pos + 3U;
-
-               PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
-                          pos_beg, pos_end, pkt_size);
-       }
-       if (task->packet_id_pos) {
-               uint32_t pos_beg = task->packet_id_pos;
-               uint32_t pos_end = task->packet_id_pos + 4U;
-
-               PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
-                          pos_beg, pos_end, pkt_size);
-       }
-       if (task->accur_pos) {
-               uint32_t pos_beg = task->accur_pos;
-               uint32_t pos_end = task->accur_pos + 3U;
-
-               PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u%-u, but packet size is %u bytes\n",
-                          pos_beg, pos_end, pkt_size);
+       int rc;
+       for (uint32_t i = 0; i < task->n_pkts;++i) {
+               if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0)
+                       return rc;
        }
+       return 0;
 }
 
 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
@@ -982,25 +1088,37 @@ static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
 {
        struct pkt_template *template;
-       struct ipv4_hdr *ip;
+       prox_rte_ipv4_hdr *ip;
 
        task->runtime_checksum_needed = 0;
        for (size_t i = 0; i < task->n_pkts; ++i) {
                template = &task->pkt_template[i];
                if (template->l2_len == 0)
                        continue;
-               ip = (struct ipv4_hdr *)(template->buf + template->l2_len);
-
-               ip->hdr_checksum = 0;
-               prox_ip_cksum_sw(ip);
-               uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
-
-               if (ip->next_proto_id == IPPROTO_UDP) {
-                       struct udp_hdr *udp = (struct udp_hdr *)(((uint8_t *)ip) + template->l3_len);
-                       prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
-               } else if (ip->next_proto_id == IPPROTO_TCP) {
-                       struct tcp_hdr *tcp = (struct tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
-                       prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
+               ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
+               if (ip->version_ihl >> 4 == 4) {
+                       ip->hdr_checksum = 0;
+                       prox_ip_cksum_sw(ip);
+                       uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
+                       if (ip->next_proto_id == IPPROTO_UDP) {
+                               prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
+                               prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
+                       } else if (ip->next_proto_id == IPPROTO_TCP) {
+                               prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
+                               prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
+                       }
+               } else if (ip->version_ihl >> 4 == 6) {
+                       prox_rte_ipv6_hdr *ip6;
+                       ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len);
+                       if (ip6->proto == IPPROTO_UDP) {
+                               prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
+                               udp->dgram_cksum = 0;
+                               udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
+                       } else if (ip6->proto == IPPROTO_TCP) {
+                               prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
+                               tcp->cksum = 0;
+                               tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
+                       }
                }
 
                /* The current implementation avoids checksum
@@ -1022,14 +1140,28 @@ static void task_gen_pkt_template_recalc_all(struct task_gen *task)
        task_gen_pkt_template_recalc_checksum(task);
 }
 
+static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes)
+{
+       struct pkt_template *src, *dst;
+
+       for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
+               for (size_t i = 0; i < task->orig_n_pkts; ++i) {
+                       dst = &task->pkt_template[j * task->orig_n_pkts + i];
+                       dst->len = pkt_sizes[j];
+               }
+       }
+}
+
 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
 {
        struct pkt_template *src, *dst;
 
-       for (size_t i = 0; i < task->n_pkts; ++i) {
-               src = &task->pkt_template_orig[i];
-               dst = &task->pkt_template[i];
-               dst->len = src->len;
+       for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
+               for (size_t i = 0; i < task->orig_n_pkts; ++i) {
+                       src = &task->pkt_template_orig[i];
+                       dst = &task->pkt_template[j * task->orig_n_pkts + i];
+                       dst->len = src->len;
+               }
        }
 }
 
@@ -1037,82 +1169,105 @@ static void task_gen_reset_pkt_templates_content(struct task_gen *task)
 {
        struct pkt_template *src, *dst;
 
-       for (size_t i = 0; i < task->n_pkts; ++i) {
-               src = &task->pkt_template_orig[i];
-               dst = &task->pkt_template[i];
-               memcpy(dst->buf, src->buf, dst->len);
+       for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
+               for (size_t i = 0; i < task->orig_n_pkts; ++i) {
+                       src = &task->pkt_template_orig[i];
+                       dst = &task->pkt_template[j * task->orig_n_pkts + i];
+                       memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len));
+                       if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
+                               rte_memcpy(&dst->buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
+                       }
+                       task_gen_apply_sig(task, dst);
+               }
        }
 }
 
 static void task_gen_reset_pkt_templates(struct task_gen *task)
 {
-       task_gen_reset_pkt_templates_len(task);
+       if (task->imix_nb_pkts)
+               task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
+       else
+               task_gen_reset_pkt_templates_len(task);
        task_gen_reset_pkt_templates_content(task);
        task_gen_pkt_template_recalc_all(task);
 }
 
 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
 {
-       const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
-
-       if (targ->pkt_size > sizeof(task->pkt_template[0].buf))
-               targ->pkt_size = sizeof(task->pkt_template[0].buf);
-       task->n_pkts = 1;
-
-       size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
-       task->pkt_template = prox_zmalloc(mem_size, socket_id);
-       task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
+       int rc;
 
-       PROX_PANIC(task->pkt_template == NULL ||
-                  task->pkt_template_orig == NULL,
-                  "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
+       task->orig_n_pkts = 1;
+       if (task->imix_nb_pkts == 0) {
+               task->n_pkts = 1;
+               task->imix_pkt_sizes[0] = targ->pkt_size;
+       } else {
+               task->n_pkts = task->imix_nb_pkts;
+       }
+       task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP);
 
-       rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, targ->pkt_size);
-       task->pkt_template_orig[0].len = targ->pkt_size;
+       rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size);
+       task->pkt_template_orig[0].len = task->imix_pkt_sizes[0];
        task_gen_reset_pkt_templates(task);
-       check_all_pkt_size(task, 1);
-       check_fields_in_bounds(task);
+       check_all_pkt_size(task, DO_PANIC);
+       check_all_fields_in_bounds(task, DO_PANIC);
+
+       // If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline
+       // In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet
+       task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
 }
 
 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
 {
-       const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
        char err[PCAP_ERRBUF_SIZE];
+       uint32_t max_frame_size;
        pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
        PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
 
-       task->n_pkts = pcap_count_pkts(handle);
-       plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
+       task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size);
+       plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size);
+       PROX_PANIC(max_frame_size > task->max_frame_size,
+               max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ?
+                       "pkt_size too high and jumbo frames disabled" : "pkt_size > mtu");
 
        if (targ->n_pkts)
-               task->n_pkts = RTE_MIN(task->n_pkts, targ->n_pkts);
-       PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
+               task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts);
+       if (task->imix_nb_pkts == 0) {
+               task->n_pkts = task->orig_n_pkts;
+       } else {
+               task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
+       }
+       task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP);
        plogx_info("Loading %u packets from pcap\n", task->n_pkts);
-       size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
-       task->pkt_template = prox_zmalloc(mem_size, socket_id);
-       task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
-       PROX_PANIC(task->pkt_template == NULL ||
-                  task->pkt_template_orig == NULL,
-                  "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
-
-       pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->pkt_template_orig, NULL);
+
+       pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size);
        pcap_close(handle);
        task_gen_reset_pkt_templates(task);
+       check_all_pkt_size(task, DO_PANIC);
+       check_all_fields_in_bounds(task, DO_PANIC);
+       task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
 }
 
-static struct rte_mempool *task_gen_create_mempool(struct task_args *targ)
+static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size)
 {
        static char name[] = "gen_pool";
        struct rte_mempool *ret;
        const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
 
        name[0]++;
-       ret = rte_mempool_create(name, targ->nb_mbuf - 1, MBUF_SIZE,
+       uint32_t mbuf_size = TX_MBUF_SIZE;
+       if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
+               mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+       plog_info("\t\tCreating mempool with name '%s'\n", name);
+       ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
                                 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
                                 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
                                 sock_id, 0);
        PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
                   sock_id, targ->nb_mbuf - 1);
+
+        plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
+                  targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
+
        return ret;
 }
 
@@ -1128,18 +1283,33 @@ int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
        struct task_gen *task = (struct task_gen *)tbase;
        int rc;
 
-       task->pkt_template[0].len = pkt_size;
-       if ((rc = check_all_pkt_size(task, 0)) != 0)
-               return rc;
-       check_fields_in_bounds(task);
-       return rc;
+       for (size_t i = 0; i < task->n_pkts; ++i) {
+               if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
+                       return rc;
+               if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
+                       return rc;
+       }
+       for (size_t i = 0; i < task->n_pkts; ++i) {
+               task->pkt_template[i].len = pkt_size;
+       }
+       return 0;
 }
 
-void task_gen_set_gateway_ip(struct task_base *tbase, uint32_t ip)
+int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
 {
        struct task_gen *task = (struct task_gen *)tbase;
-       task->gw_ip = ip;
-       task->flags &= ~FLAG_DST_MAC_KNOWN;
+       int rc;
+
+       memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t));
+       for (size_t i = 0; i < nb_pkt_sizes; ++i) {
+               if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
+                       return rc;
+               if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
+                       return rc;
+       }
+       // only set new_imix_nb_pkts if checks of pkt sizes succeeded
+       task->new_imix_nb_pkts = nb_pkt_sizes;
+       return 0;
 }
 
 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
@@ -1159,13 +1329,14 @@ void task_gen_reset_randoms(struct task_base *tbase)
                task->rand[i].rand_offset = 0;
        }
        task->n_rands = 0;
-       task->flags &= ~FLAG_RANDOM_IPS;
 }
 
 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
 {
        struct task_gen *task = (struct task_gen *)tbase;
 
+       if (offset + len > task->max_frame_size)
+               return -1;
        for (size_t i = 0; i < task->n_pkts; ++i) {
                uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
                uint8_t *dst = task->pkt_template[i].buf;
@@ -1183,6 +1354,16 @@ void task_gen_reset_values(struct task_base *tbase)
        struct task_gen *task = (struct task_gen *)tbase;
 
        task_gen_reset_pkt_templates_content(task);
+       task_gen_pkt_template_recalc_metadata(task);
+       check_all_pkt_size(task, DO_NOT_PANIC);
+       check_all_fields_in_bounds(task, DO_NOT_PANIC);
+       task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
+
+       if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
+               for (uint32_t i = 0; i < task->n_pkts; ++i) {
+                       rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
+               }
+       }
 }
 
 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
@@ -1195,40 +1376,44 @@ uint32_t task_gen_get_n_randoms(struct task_base *tbase)
 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
 {
        struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
-       const uint32_t sockid = rte_lcore_to_socket_id(targ->lconf->id);
+       task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
+       uint32_t max_frame_size;
 
        task->loop = targ->loop;
        task->pkt_idx = 0;
        task->hz = rte_get_tsc_hz();
 
-       task->local_mbuf.mempool = task_gen_create_mempool(targ);
-
-       PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
-
        char err[PCAP_ERRBUF_SIZE];
        pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
        PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
 
-       task->n_pkts = pcap_count_pkts(handle);
+       task->n_pkts = pcap_count_pkts(handle, &max_frame_size);
        plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
 
+       task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size);
+
+       PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
+
        if (targ->n_pkts) {
                plogx_info("Configured to load %u packets\n", targ->n_pkts);
                if (task->n_pkts > targ->n_pkts)
                        task->n_pkts = targ->n_pkts;
        }
-       PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
-
        plogx_info("Loading %u packets from pcap\n", task->n_pkts);
 
        size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
-       uint8_t *mem = prox_zmalloc(mem_size, sockid);
+       uint8_t *mem = prox_zmalloc(mem_size, task->socket_id);
 
        PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
        task->proto = (struct pkt_template *) mem;
        task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
 
-       pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc);
+       for (uint i = 0; i < targ->n_pkts; i++) {
+               task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id);
+               PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size);
+       }
+
+       pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size);
        pcap_close(handle);
 }
 
@@ -1276,22 +1461,45 @@ int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t of
        task->rand[task->n_rands].rand_mask = mask;
        task->rand[task->n_rands].fixed_bits = fixed;
 
-       struct pkt_template *pktpl = &task->pkt_template[0];
-       if (!((offset >= pktpl->ip_dst_pos + 4) || (offset + len < pktpl->ip_dst_pos))) {
-               plog_info("\tUsing randoms IP destinations\n");
-               task->flags |= FLAG_RANDOM_IPS;
-       }
-
        task->n_rands++;
        return 0;
 }
 
+static void start(struct task_base *tbase)
+{
+       struct task_gen *task = (struct task_gen *)tbase;
+       task->pkt_queue_index = 0;
+
+       task_gen_reset_token_time(task);
+       if (tbase->l3.tmaster) {
+               register_all_ip_to_ctrl_plane(task);
+       }
+
+       /* TODO
+          Handle the case when two tasks transmit to the same port
+          and one of them is stopped. In that case ARP (requests or replies)
+          might not be sent. Master will have to keep a list of rings.
+          stop will have to de-register IP from ctrl plane.
+          un-registration will remove the ring. when having more than
+          one active rings, master can always use the first one
+       */
+}
+
+static void start_pcap(struct task_base *tbase)
+{
+       struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
+       /* When we start, the first packet is sent immediately. */
+       task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
+       task->pkt_idx = 0;
+}
+
 static void init_task_gen_early(struct task_args *targ)
 {
        uint8_t *generator_count = prox_sh_find_system("generator_count");
 
        if (generator_count == NULL) {
-               generator_count = prox_zmalloc(sizeof(*generator_count), 0);
+               generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id));
+               PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n");
                prox_sh_add_system("generator_count", generator_count);
        }
        targ->generator_id = *generator_count;
@@ -1301,10 +1509,21 @@ static void init_task_gen_early(struct task_args *targ)
 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
 {
        struct task_gen *task = (struct task_gen *)tbase;
+       task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
 
        task->packet_id_pos = targ->packet_id_pos;
 
-       task->local_mbuf.mempool = task_gen_create_mempool(targ);
+       struct prox_port_cfg *port = find_reachable_port(targ);
+       // TODO: check that all reachable ports have the same mtu...
+       if (port) {
+               task->cksum_offload = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
+               task->port = port;
+               task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE;
+       } else {
+               // Not generating to any port...
+               task->max_frame_size = PROX_RTE_ETHER_MAX_LEN;
+       }
+       task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size);
        PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
        task->pkt_idx = 0;
        task->hz = rte_get_tsc_hz();
@@ -1314,9 +1533,16 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
        task->sig = targ->sig;
        task->new_rate_bps = targ->rate_bps;
 
+       /*
+        * For tokens, use 10 Gbps as base rate
+        * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
+        * Script can query prox "port info" command to find out the port link speed to know
+        * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
+        * probably also to check the driver (as returned by the same "port info" command.
+        */
        struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
-
        token_time_init(&task->token_time, &tt_cfg);
+
        init_task_gen_seeds(task);
 
        task->min_bulk_size = targ->min_bulk_size;
@@ -1334,98 +1560,55 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
        PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
 
        task->generator_id = targ->generator_id;
-       task->link_speed = UINT64_MAX;
-       if (targ->nb_txrings == 0 && targ->nb_txports == 1)
-               task->link_speed = 1250000000;
+       plog_info("\t\tGenerator id = %d\n", task->generator_id);
+
+       // Allocate array holding bytes to tsc for supported frame sizes
+       task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id);
+       PROX_PANIC(task->bytes_to_tsc == NULL,
+               "Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size);
+
+       // task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
+       // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
+       uint64_t bytes_per_hz = UINT64_MAX;
+       if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) {
+               bytes_per_hz = task->port->max_link_speed * 125000L;
+               plog_info("\t\tPort %u: max link speed is %ld Mbps\n",
+                       (uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
+       }
+       // There are cases where hz estimate might be slighly over-estimated
+       // This results in too much extrapolation
+       // Only account for 99% of extrapolation to handle cases with up to 1% error clocks
+       for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) {
+               if (bytes_per_hz == UINT64_MAX)
+                       task->bytes_to_tsc[i] = 0;
+               else
+                       task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz;
+       }
 
+       task->imix_nb_pkts = targ->imix_nb_pkts;
+       for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) {
+               task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i];
+       }
        if (!strcmp(targ->pcap_file, "")) {
-               plog_info("\tUsing inline definition of a packet\n");
+               plog_info("\t\tUsing inline definition of a packet\n");
                task_init_gen_load_pkt_inline(task, targ);
        } else {
-               plog_info("Loading from pcap %s\n", targ->pcap_file);
+               plog_info("\t\tLoading from pcap %s\n", targ->pcap_file);
                task_init_gen_load_pcap(task, targ);
        }
 
-       if ((targ->flags & DSF_KEEP_SRC_MAC) == 0 && (targ->nb_txrings || targ->nb_txports)) {
-               uint8_t *src_addr = prox_port_cfg[tbase->tx_params_hw.tx_port_queue->port].eth_addr.addr_bytes;
-               for (uint32_t i = 0; i < task->n_pkts; ++i) {
-                       rte_memcpy(&task->pkt_template[i].buf[6], src_addr, 6);
-               }
-       }
-       memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(struct ether_addr));
-       if (!strcmp(targ->task_init->sub_mode_str, "l3")) {
-               // In L3 GEN, we need to receive ARP replies
-               task->flags = FLAG_L3_GEN;
-               task->gw_ip = rte_cpu_to_be_32(targ->gateway_ipv4);
-               uint32_t n_entries;
-
-               if (targ->number_gen_ip == 0)
-                       n_entries = 1048576;
-               else
-                       n_entries = targ->number_gen_ip;
-
-               static char hash_name[30];
-               sprintf(hash_name, "A%03d_mac_table", targ->lconf->id);
-
-               struct rte_hash_parameters hash_params = {
-                       .name = hash_name,
-                       .entries = n_entries,
-                       .key_len = sizeof(uint32_t),
-                       .hash_func = rte_hash_crc,
-                       .hash_func_init_val = 0,
-               };
-               task->mac_hash = rte_hash_create(&hash_params);
-               PROX_PANIC(task->mac_hash == NULL, "Failed to set up mac hash table for %d IP\n", n_entries);
-
-               const uint32_t socket = rte_lcore_to_socket_id(targ->lconf->id);
-               task->dst_mac = (uint64_t *)prox_zmalloc(n_entries * sizeof(uint64_t), socket);
-               PROX_PANIC(task->dst_mac == NULL, "Failed to allocate mac table for %d IP\n", n_entries);
-
+       PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port");
+       if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) {
+               task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC;
+               memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr));
                for (uint32_t i = 0; i < task->n_pkts; ++i) {
-                       // For all destination IP, ARP request will need to be sent
-                       // Store position of Destination IP in template
-                       int ip_dst_pos = 0;
-                       int maybe_ipv4 = 0;
-                       int l2_len = sizeof(struct ether_hdr);
-                       struct vlan_hdr *vlan_hdr;
-                       uint8_t *pkt = task->pkt_template[i].buf;
-                       struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
-                       struct ipv4_hdr *ip;
-                       uint16_t ether_type = eth_hdr->ether_type;
-
-                       // Unstack VLAN tags
-                       while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(struct vlan_hdr) < task->pkt_template[i].len)) {
-                               vlan_hdr = (struct vlan_hdr *)(pkt + l2_len);
-                               l2_len +=4;
-                               ether_type = vlan_hdr->eth_proto;
-                       }
-                       if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
-                               l2_len +=4;
-                               maybe_ipv4 = 1;
-                       }
-                       if ((ether_type == ETYPE_IPv4) || maybe_ipv4) {
-                               struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + l2_len);
-                               PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
-                               // Even if IPv4 header contains options, options are after ip src and dst
-                               ip_dst_pos = l2_len + sizeof(struct ipv4_hdr) - sizeof(uint32_t);
-                               uint32_t *p = ((uint32_t *)(task->pkt_template[i].buf + ip_dst_pos - sizeof(uint32_t)));
-                               task->pkt_template[i].ip_dst_pos = ip_dst_pos;
-                               task->pkt_template[i].ip_src = *p;
-                               uint32_t *p1 = ((uint32_t *)(task->pkt_template[i].buf + ip_dst_pos));
-                               plog_info("\tip_dst_pos = %d, ip_dst = %x\n", ip_dst_pos, *p1);
-                       }
+                       rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
                }
-               task->src_ip = rte_cpu_to_be_32(targ->local_ipv4);
        }
        for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
                PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
                           "Failed to add random\n");
        }
-
-       struct prox_port_cfg *port = find_reachable_port(targ);
-       if (port) {
-               task->cksum_offload = port->capabilities.tx_offload_cksum;
-       }
 }
 
 static struct task_init task_init_gen = {
@@ -1433,10 +1616,11 @@ static struct task_init task_init_gen = {
        .init = init_task_gen,
        .handle = handle_gen_bulk,
        .start = start,
+       .early_init = init_task_gen_early,
 #ifdef SOFT_CRC
        // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
        // vector mode is used by DPDK, resulting (theoretically) in higher performance.
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
        .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
@@ -1449,24 +1633,27 @@ static struct task_init task_init_gen_l3 = {
        .init = init_task_gen,
        .handle = handle_gen_bulk,
        .start = start,
+       .early_init = init_task_gen_early,
 #ifdef SOFT_CRC
        // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
        // vector mode is used by DPDK, resulting (theoretically) in higher performance.
-       .flag_features = TASK_FEATURE_ZERO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_ZERO_RX,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
-       .flag_features = TASK_FEATURE_ZERO_RX,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
        .size = sizeof(struct task_gen)
 };
 
+/* This mode uses time stamps in the pcap file */
 static struct task_init task_init_gen_pcap = {
        .mode_str = "gen",
        .sub_mode_str = "pcap",
        .init = init_task_gen_pcap,
        .handle = handle_gen_pcap_bulk,
        .start = start_pcap,
+       .early_init = init_task_gen_early,
 #ifdef SOFT_CRC
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
        .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif