X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=VNFs%2FDPPD-PROX%2Fhandle_gen.c;h=6a517c3f98bd838a2f0babb417ffa61cc883b742;hb=8442f6a8ce0962d818b7cd800150980c65983719;hp=e5e43fca84da6f104999fcdc7ef5cf14d0ab8c0a;hpb=4eca0440aee462f842567d5ef8b8796c27f4dd1b;p=samplevnf.git diff --git a/VNFs/DPPD-PROX/handle_gen.c b/VNFs/DPPD-PROX/handle_gen.c index e5e43fca..6a517c3f 100644 --- a/VNFs/DPPD-PROX/handle_gen.c +++ b/VNFs/DPPD-PROX/handle_gen.c @@ -1,5 +1,5 @@ /* -// Copyright (c) 2010-2017 Intel Corporation +// Copyright (c) 2010-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. */ - #include #include #include @@ -22,6 +21,8 @@ #include #include #include +#include +#include #include "prox_shared.h" #include "random.h" @@ -45,27 +46,26 @@ #include "local_mbuf.h" #include "arp.h" #include "tx_pkt.h" -#include +#include "handle_master.h" +#include "defines.h" +#include "prox_ipv6.h" struct pkt_template { - uint64_t dst_mac; - uint32_t ip_src; - uint32_t ip_dst_pos; uint16_t len; uint16_t l2_len; uint16_t l3_len; - uint8_t buf[ETHER_MAX_LEN]; + uint8_t *buf; }; -#define FLAG_DST_MAC_KNOWN 1 -#define FLAG_L3_GEN 2 -#define FLAG_RANDOM_IPS 4 +#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24 -#define MAX_TEMPLATE_INDEX 65536 -#define TEMPLATE_INDEX_MASK (MAX_TEMPLATE_INDEX - 1) -#define MBUF_ARP MAX_TEMPLATE_INDEX +#define DO_PANIC 1 +#define DO_NOT_PANIC 0 -#define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24 +#define FROM_PCAP 1 +#define NOT_FROM_PCAP 0 + +#define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt) { @@ -87,12 +87,12 @@ struct task_gen_pcap { uint32_t n_pkts; uint64_t last_tsc; uint64_t *proto_tsc; + uint32_t socket_id; }; struct task_gen { struct task_base base; uint64_t hz; - uint64_t link_speed; struct token_time token_time; struct local_mbuf local_mbuf; struct pkt_template *pkt_template; /* packet templates used at runtime */ @@ -101,14 +101,17 @@ struct task_gen { uint64_t new_rate_bps; uint64_t pkt_queue_index; uint32_t n_pkts; /* number of packets in pcap */ + uint32_t orig_n_pkts; /* number of packets in pcap */ uint32_t pkt_idx; /* current packet from pcap */ uint32_t pkt_count; /* how many pakets to generate */ + uint32_t max_frame_size; uint32_t runtime_flags; uint16_t lat_pos; uint16_t packet_id_pos; uint16_t accur_pos; uint16_t sig_pos; uint32_t sig; + uint32_t socket_id; uint8_t generator_id; uint8_t n_rands; /* number of randoms */ uint8_t min_bulk_size; @@ -122,24 +125,30 @@ struct task_gen { uint16_t rand_offset; /* each random has an offset*/ uint8_t rand_len; /* # bytes to take from random (no bias introduced) */ } rand[64]; - uint64_t accur[64]; + uint64_t accur[ACCURACY_WINDOW]; uint64_t pkt_tsc_offset[64]; struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */ - struct ether_addr gw_mac; - struct ether_addr src_mac; - struct rte_hash *mac_hash; - uint64_t *dst_mac; - uint32_t gw_ip; - uint32_t src_ip; + prox_rte_ether_addr src_mac; uint8_t flags; uint8_t cksum_offload; + struct prox_port_cfg *port; + uint64_t *bytes_to_tsc; + uint32_t imix_pkt_sizes[MAX_IMIX_PKTS]; + uint32_t imix_nb_pkts; + uint32_t new_imix_nb_pkts; } __rte_cache_aligned; -static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip) +static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes); +static void task_gen_reset_pkt_templates_content(struct task_gen *task); +static void task_gen_pkt_template_recalc_metadata(struct task_gen *task); +static int check_all_pkt_size(struct task_gen *task, int do_panic); +static int check_all_fields_in_bounds(struct task_gen *task, int do_panic); + +static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip) { /* Optimize for common case of IPv4 header without options. */ if (ip->version_ihl == 0x45) - return sizeof(struct ipv4_hdr); + return sizeof(prox_rte_ipv4_hdr); if (unlikely(ip->version_ihl >> 4 != 4)) { plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); return 0; @@ -149,16 +158,16 @@ static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip) static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len) { - *l2_len = sizeof(struct ether_hdr); + *l2_len = sizeof(prox_rte_ether_hdr); *l3_len = 0; - struct vlan_hdr *vlan_hdr; - struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt; - struct ipv4_hdr *ip; + prox_rte_vlan_hdr *vlan_hdr; + prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt; + prox_rte_ipv4_hdr *ip; uint16_t ether_type = eth_hdr->ether_type; // Unstack VLAN tags - while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(struct vlan_hdr) < len)) { - vlan_hdr = (struct vlan_hdr *)(pkt + *l2_len); + while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) { + vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len); *l2_len +=4; ether_type = vlan_hdr->eth_proto; } @@ -171,11 +180,11 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui case ETYPE_MPLSM: *l2_len +=4; break; + case ETYPE_IPv6: case ETYPE_IPv4: break; case ETYPE_EoGRE: case ETYPE_ARP: - case ETYPE_IPv6: *l2_len = 0; break; default: @@ -185,8 +194,9 @@ static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, ui } if (*l2_len) { - struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + *l2_len); - *l3_len = ipv4_get_hdr_len(ip); + prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len); + if (ip->version_ihl >> 4 == 4) + *l3_len = ipv4_get_hdr_len(ip); } } @@ -195,9 +205,20 @@ static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_temp uint16_t l2_len = pkt_template->l2_len; uint16_t l3_len = pkt_template->l3_len; - if (l2_len) { - struct ipv4_hdr *ip = (struct ipv4_hdr*)(hdr + l2_len); + prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len); + if (l3_len) { prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload); + } else if (ip->version_ihl >> 4 == 6) { + prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len); + if (ip6->proto == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1); + udp->dgram_cksum = 0; + udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp); + } else if (ip6->proto == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1); + tcp->cksum = 0; + tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp); + } } } @@ -207,22 +228,6 @@ static void task_gen_reset_token_time(struct task_gen *task) token_time_reset(&task->token_time, rte_rdtsc(), 0); } -static void start(struct task_base *tbase) -{ - struct task_gen *task = (struct task_gen *)tbase; - task->pkt_queue_index = 0; - - task_gen_reset_token_time(task); -} - -static void start_pcap(struct task_base *tbase) -{ - struct task_gen_pcap *task = (struct task_gen_pcap *)tbase; - /* When we start, the first packet is sent immediately. */ - task->last_tsc = rte_rdtsc() - task->proto_tsc[0]; - task->pkt_idx = 0; -} - static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk) { if (task->pkt_count == (uint32_t)-1) @@ -287,20 +292,14 @@ static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL); } -static uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes) +static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes) { - const uint64_t hz = task->hz; - const uint64_t bytes_per_hz = task->link_speed; - - if (bytes_per_hz == UINT64_MAX) - return 0; - - return hz * bytes / bytes_per_hz; + return task->bytes_to_tsc[bytes]; } static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx) { - return pkt_idx + 1 == task->n_pkts? 0 : pkt_idx + 1; + return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1; } static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset) @@ -337,13 +336,7 @@ static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *t */ for (uint16_t j = 0; j < max_bulk; ++j) { struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp]; - if (unlikely((task->flags & (FLAG_L3_GEN | FLAG_DST_MAC_KNOWN)) == FLAG_L3_GEN)) { - // Generator is supposed to get MAC address - MAC is still unknown for this template - // generate ARP Request to gateway instead of the intended packet - pkt_size = 60; - } else { - pkt_size = pktpl->len; - } + pkt_size = pktpl->len; uint32_t pkt_len = pkt_len_to_wire_size(pkt_size); if (pkt_len + would_send_bytes > task->token_time.bytes_now) break; @@ -360,106 +353,6 @@ static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *t return send_bulk; } -static inline void create_arp(struct rte_mbuf *mbuf, uint8_t *pkt_hdr, uint64_t *src_mac, uint32_t ip_dst, uint32_t ip_src) -{ - uint64_t mac_bcast = 0xFFFFFFFFFFFF; - rte_pktmbuf_pkt_len(mbuf) = 42; - rte_pktmbuf_data_len(mbuf) = 42; - init_mbuf_seg(mbuf); - struct ether_hdr_arp *hdr_arp = (struct ether_hdr_arp *)pkt_hdr; - - memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &mac_bcast, 6); - memcpy(&hdr_arp->ether_hdr.s_addr.addr_bytes, src_mac, 6); - hdr_arp->ether_hdr.ether_type = ETYPE_ARP; - hdr_arp->arp.htype = 0x100, - hdr_arp->arp.ptype = 0x0008; - hdr_arp->arp.hlen = 6; - hdr_arp->arp.plen = 4; - hdr_arp->arp.oper = 0x100; - hdr_arp->arp.data.spa = ip_src; - hdr_arp->arp.data.tpa = ip_dst; - memset(&hdr_arp->arp.data.tha, 0, sizeof(struct ether_addr)); - memcpy(&hdr_arp->arp.data.sha, src_mac, sizeof(struct ether_addr)); -} - -static int task_gen_write_dst_mac(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count) -{ - uint32_t ip_dst_pos, ip_src_pos, ip_dst, ip_src; - uint16_t i; - int ret; - - if (task->flags & FLAG_L3_GEN) { - if (task->gw_ip) { - if (unlikely((task->flags & FLAG_DST_MAC_KNOWN) == 0)) { - for (i = 0; i < count; ++i) { - struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK]; - create_arp(mbufs[i], pkt_hdr[i], (uint64_t *)&pktpl->buf[6], task->gw_ip, pktpl->ip_src); - mbufs[i]->udata64 |= MBUF_ARP; - } - } else { - for (i = 0; i < count; ++i) { - struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i]; - memcpy(&hdr->d_addr.addr_bytes, &task->gw_mac, 6); - } - } - } else if (unlikely((task->flags & FLAG_RANDOM_IPS) != 0) || (task->n_pkts >= 4)){ - // Find mac in lookup table. Send ARP if not found - int32_t positions[MAX_PKT_BURST], idx; - void *keys[MAX_PKT_BURST]; - uint32_t key[MAX_PKT_BURST]; - for (i = 0; i < count; ++i) { - uint8_t *hdr = (uint8_t *)pkt_hdr[i]; - struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK]; - ip_dst_pos = pktpl->ip_dst_pos; - ip_dst = *(uint32_t *)(hdr + ip_dst_pos); - key[i] = ip_dst; - keys[i] = &key[i]; - } - ret = rte_hash_lookup_bulk(task->mac_hash, (const void **)&keys, count, positions); - if (unlikely(ret < 0)) { - plogx_err("lookup_bulk failed in mac_hash\n"); - tx_pkt_drop_all((struct task_base *)task, mbufs, count, NULL); - return -1; - } - for (i = 0; i < count; ++i) { - idx = positions[i]; - if (unlikely(idx < 0)) { - // mac not found for this IP - struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK]; - uint8_t *hdr = (uint8_t *)pkt_hdr[i]; - ip_src_pos = pktpl->ip_dst_pos - 4; - ip_src = *(uint32_t *)(hdr + ip_src_pos); - create_arp(mbufs[i], pkt_hdr[i], (uint64_t *)&hdr[6], key[i], ip_src); - mbufs[i]->udata64 |= MBUF_ARP; - } else { - // mac found for this IP - struct ether_hdr_arp *hdr_arp = (struct ether_hdr_arp *)pkt_hdr[i]; - memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &task->dst_mac[idx], 6); - } - } - } else { - for (i = 0; i < count; ++i) { - uint8_t *hdr = (uint8_t *)pkt_hdr[i]; - struct pkt_template *pktpl = &task->pkt_template[mbufs[i]->udata64 & TEMPLATE_INDEX_MASK]; - - // Check if packet template already has the mac - if (unlikely(pktpl->dst_mac == 0)) { - // no random_ip, can take from from packet template but no mac (yet) - uint32_t ip_dst_pos = pktpl->ip_dst_pos; - ip_dst = *(uint32_t *)(hdr + ip_dst_pos); - create_arp(mbufs[i], pkt_hdr[i], (uint64_t *)&pktpl->buf[6], ip_dst, pktpl->ip_src); - mbufs[i]->udata64 |= MBUF_ARP; - } else { - // no random ip, mac known - struct ether_hdr_arp *hdr_arp = (struct ether_hdr_arp *)pkt_hdr[i]; - memcpy(&hdr_arp->ether_hdr.d_addr.addr_bytes, &pktpl->dst_mac, 6); - } - } - } - } - return 0; -} - static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr) { uint32_t ret, ret_tmp; @@ -491,9 +384,10 @@ static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, ui *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy; } -static void task_gen_apply_sig(struct task_gen *task, uint8_t *pkt_hdr) +static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst) { - *(uint32_t *)(pkt_hdr + task->sig_pos) = task->sig; + if (task->sig_pos) + *(uint32_t *)(dst->buf + task->sig_pos) = task->sig; } static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count) @@ -501,26 +395,12 @@ static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf if (!task->accur_pos) return; - /* The accuracy of task->pkt_queue_index - 64 is stored in - packet task->pkt_queue_index. The ID modulo 64 is the + /* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in + packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the same. */ for (uint16_t j = 0; j < count; ++j) { - if ((mbufs[j]->udata64 & MBUF_ARP) == 0) { - uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & 63]; - task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy); - } - } -} - -static void task_gen_apply_all_sig(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count) -{ - if (!task->sig_pos) - return; - - for (uint16_t j = 0; j < count; ++j) { - if ((mbufs[j]->udata64 & MBUF_ARP) == 0) { - task_gen_apply_sig(task, pkt_hdr[j]); - } + uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)]; + task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy); } } @@ -537,11 +417,9 @@ static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf return; for (uint16_t i = 0; i < count; ++i) { - if ((mbufs[i]->udata64 & MBUF_ARP) == 0) { - struct unique_id id; - unique_id_init(&id, task->generator_id, task->pkt_queue_index++); - task_gen_apply_unique_id(task, pkt_hdr[i], &id); - } + struct unique_id id; + unique_id_init(&id, task->generator_id, task->pkt_queue_index++); + task_gen_apply_unique_id(task, pkt_hdr[i], &id); } } @@ -555,11 +433,9 @@ static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **m uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count); for (uint16_t i = 0; i < count; ++i) { - if ((mbufs[i]->udata64 & MBUF_ARP) == 0) { - struct pkt_template *pkt_template = &task->pkt_template[pkt_idx]; - checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload); - pkt_idx = task_gen_next_pkt_idx(task, pkt_idx); - } + struct pkt_template *pkt_template = &task->pkt_template[pkt_idx]; + checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload); + pkt_idx = task_gen_next_pkt_idx(task, pkt_idx); } } @@ -579,8 +455,12 @@ static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t coun uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1); struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx]; uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len); +#ifdef NO_EXTRAPOLATION + uint64_t bulk_duration = task->pkt_tsc_offset[count - 1]; +#else uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len); uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration; +#endif return bulk_duration; } @@ -615,6 +495,14 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, simply sleeping until delta_t is zero would leave a period of silence on the line. The error has been introduced earlier, but the packets have already been sent. */ + + /* This happens typically if previous bulk was delayed + by an interrupt e.g. (with Time in nsec) + Time x: sleep 4 microsec + Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes) + Time x+5000: send 16 packets (16 packets as 1000 nsec) + When we send the 16 packets, the 64 ealier packets are not yet + fully sent */ if (tx_tsc < task->earliest_tsc_next_pkt) delta_t = task->earliest_tsc_next_pkt - tx_tsc; else @@ -623,12 +511,10 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, for (uint16_t i = 0; i < count; ++i) { uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos); const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i]; - *pos = pkt_tsc >> LATENCY_ACCURACY; } uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count); - task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration; write_tsc_after = rte_rdtsc(); task->write_duration_estimate = write_tsc_after - write_tsc_before; @@ -638,6 +524,7 @@ static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, do { tsc_before_tx = rte_rdtsc(); } while (tsc_before_tx < tx_tsc); + return tsc_before_tx; } @@ -650,7 +537,7 @@ static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint6 uint64_t first_accuracy_idx = task->pkt_queue_index - count; for (uint32_t i = 0; i < count; ++i) { - uint32_t accuracy_idx = (first_accuracy_idx + i) & 63; + uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1); task->accur[accuracy_idx] = accur; } @@ -674,108 +561,361 @@ static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbuf struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx]; struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx]; pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]); - mbufs[i]->udata64 = task->pkt_idx & TEMPLATE_INDEX_MASK; - struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i]; + prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i]; if (task->lat_enabled) { +#ifdef NO_EXTRAPOLATION + task->pkt_tsc_offset[i] = 0; +#else task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes); +#endif will_send_bytes += pkt_len_to_wire_size(pkt_template->len); } task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx); } } +static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap) +{ + size_t mem_size = nb_pkts * sizeof(*task->pkt_template); + size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template); + task->pkt_template = prox_zmalloc(mem_size, task->socket_id); + task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id); + + if (task->pkt_template == NULL || task->pkt_template_orig == NULL) { + plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template"); + return -1; + } + + for (size_t i = 0; i < orig_nb_pkts; i++) { + task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id); + if (task->pkt_template_orig[i].buf == NULL) { + plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet"); + return -1; + } + } + for (size_t i = 0; i < nb_pkts; i++) { + task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id); + if (task->pkt_template[i].buf == NULL) { + plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet"); + return -1; + } + } + return 0; +} + +static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic) +{ + // Need to free up bufs allocated in previous (longer) imix + for (size_t i = nb_pkts; i < task->n_pkts; i++) { + if (task->pkt_template[i].buf) { + rte_free(task->pkt_template[i].buf); + task->pkt_template[i].buf = NULL; + } + } + + size_t mem_size = nb_pkts * sizeof(*task->pkt_template); + size_t old_mem_size = task->n_pkts * sizeof(*task->pkt_template); + if (old_mem_size > mem_size) + old_mem_size = mem_size; + + struct pkt_template *ptr; + + // re-allocate memory for new pkt_template (this might allocate additional memory or free up some...) + if ((ptr = rte_malloc_socket(NULL, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) { + memcpy(ptr, task->pkt_template, old_mem_size); + rte_free(task->pkt_template); + task->pkt_template = ptr; + } else { + plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size); + return -1; + } + + // Need to allocate bufs for new template but no need to reallocate for existing ones + for (size_t i = task->n_pkts; i < nb_pkts; ++i) { + task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id); + if (task->pkt_template[i].buf == NULL) { + plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i); + return -1; + } + } + return 0; +} + +static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic) +{ + const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr); + const uint16_t max_len = task->max_frame_size; + + if (do_panic) { + PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n"); + PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len); + PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len); + return 0; + } else { + if (pkt_size == 0) { + plog_err("Invalid packet size length (no packet defined?)\n"); + return -1; + } + if (pkt_size > max_len) { + if (pkt_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4) + plog_err("pkt_size too high and jumbo frames disabled\n"); + else + plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len); + return -1; + } + if (pkt_size < min_len) { + plog_err("pkt_size out of range (must be >= %u)\n", min_len); + return -1; + } + return 0; + } +} + +static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic) +{ + if (task->lat_enabled) { + uint32_t pos_beg = task->lat_pos; + uint32_t pos_end = task->lat_pos + 3U; + + if (do_panic) + PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n", + pos_beg, pos_end, pkt_size); + else if (pkt_size <= pos_end) { + plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size); + return -1; + } + } + if (task->packet_id_pos) { + uint32_t pos_beg = task->packet_id_pos; + uint32_t pos_end = task->packet_id_pos + 4U; + + if (do_panic) + PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n", + pos_beg, pos_end, pkt_size); + else if (pkt_size <= pos_end) { + plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size); + return -1; + } + } + if (task->accur_pos) { + uint32_t pos_beg = task->accur_pos; + uint32_t pos_end = task->accur_pos + 3U; + + if (do_panic) + PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n", + pos_beg, pos_end, pkt_size); + else if (pkt_size <= pos_end) { + plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size); + return -1; + } + } + return 0; +} + +static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t n_orig_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes) +{ + size_t k; + uint32_t l4_len; + prox_rte_ipv4_hdr *ip; + struct pkt_template *template; + + for (size_t j = 0; j < nb_pkt_sizes; ++j) { + for (size_t i = 0; i < n_orig_pkts; ++i) { + k = j * n_orig_pkts + i; + template = &task->pkt_template[k]; + if (template->l2_len == 0) + continue; + ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len); + ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len); + l4_len = pkt_sizes[j] - template->l2_len - template->l3_len; + ip->hdr_checksum = 0; + prox_ip_cksum_sw(ip); + + if (ip->next_proto_id == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len); + udp->dgram_len = rte_bswap16(l4_len); + prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr); + } else if (ip->next_proto_id == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len); + prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr); + } + } + } + return 0; +} + +static int task_gen_apply_imix(struct task_gen *task, int do_panic) +{ + struct pkt_template *ptr; + int rc; + task->imix_nb_pkts = task->new_imix_nb_pkts; + uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts; + + if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0)) + return rc; + + task->n_pkts = n_pkts; + if (task->pkt_idx >= n_pkts) + task->pkt_idx = 0; + task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes); + task_gen_reset_pkt_templates_content(task); + task_gen_pkt_template_recalc_metadata(task); + check_all_pkt_size(task, DO_NOT_PANIC); + check_all_fields_in_bounds(task, DO_NOT_PANIC); + task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes); + return 0; +} + static void task_gen_update_config(struct task_gen *task) { if (task->token_time.cfg.bpp != task->new_rate_bps) task_gen_reset_token_time(task); + if (task->new_imix_nb_pkts) + task_gen_apply_imix(task, DO_NOT_PANIC); + task->new_imix_nb_pkts = 0; } -static inline void handle_arp_pkts(struct task_gen *task, struct rte_mbuf **mbufs, uint16_t n_pkts) +static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits) { - int j; - int ret; - struct ether_hdr_arp *hdr; - uint8_t out[MAX_PKT_BURST]; - static struct my_arp_t arp_reply = { - .htype = 0x100, - .ptype = 8, - .hlen = 6, - .plen = 4, - .oper = 0x200 - }; - static struct my_arp_t arp_request = { - .htype = 0x100, - .ptype = 8, - .hlen = 6, - .plen = 4, - .oper = 0x100 - }; - - for (j = 0; j < n_pkts; ++j) { - PREFETCH0(mbufs[j]); - } - for (j = 0; j < n_pkts; ++j) { - PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *)); - } - for (j = 0; j < n_pkts; ++j) { - hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *); - if (hdr->ether_hdr.ether_type == ETYPE_ARP) { - if (memcmp(&hdr->arp, &arp_reply, 8) == 0) { - uint32_t ip = hdr->arp.data.spa; - // plog_info("Received ARP Reply for IP %x\n",ip); - if (ip == task->gw_ip) { - memcpy(&task->gw_mac, &hdr->arp.data.sha, 6);; - task->flags |= FLAG_DST_MAC_KNOWN; - out[j] = OUT_HANDLED; - continue; - } else if ((task->n_pkts >= 4) || (task->flags & FLAG_RANDOM_IPS)) { - // Ideally, we should add the key when making the arp request, - // We should only store the mac address key was created. - // Here we are storing MAC we did not asked for... - ret = rte_hash_add_key(task->mac_hash, (const void *)&ip); - if (ret < 0) { - plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip)); - out[j] = OUT_DISCARD; - } else { - task->dst_mac[ret] = *(uint64_t *)&(hdr->arp.data.sha); - out[j] = OUT_HANDLED; - } + struct task_base *tbase = (struct task_base *)task; + if (bit_pos < 32) { + build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits); + if (mask & 1) { + build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits); + } + } else { + register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + } +} + +static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits) +{ + struct task_base *tbase = (struct task_base *)task; + if (var_bit_pos < 32) { + build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits); + if (mask & 1) { + int byte_pos = (var_bit_pos + init_var_bit_pos) / 8; + int bit_pos = (var_bit_pos + init_var_bit_pos) % 8; + val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos); + build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits); + } + } else { + for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++) + val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i]; + register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + } +} + +static inline void register_all_ip_to_ctrl_plane(struct task_gen *task) +{ + struct task_base *tbase = (struct task_base *)task; + int i, len, fixed; + unsigned int offset; + uint32_t mask, ip_len; + struct ipv6_addr *ip6_src = NULL; + uint32_t *ip_src; + + for (uint32_t i = 0; i < task->n_pkts; ++i) { + struct pkt_template *pktpl = &task->pkt_template[i]; + unsigned int ip_src_pos = 0; + int ipv4 = 0; + unsigned int l2_len = sizeof(prox_rte_ether_hdr); + + uint8_t *pkt = pktpl->buf; + prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt; + uint16_t ether_type = eth_hdr->ether_type; + prox_rte_vlan_hdr *vlan_hdr; + prox_rte_ipv4_hdr *ip; + + // Unstack VLAN tags + while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) { + vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len); + l2_len +=4; + ether_type = vlan_hdr->eth_proto; + } + if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) { + l2_len +=4; + ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); + if (ip->version_ihl >> 4 == 4) + ipv4 = 1; + else if (ip->version_ihl >> 4 != 6) // Version field at same location for IPv4 and IPv6 + continue; + } else if (ether_type == ETYPE_IPv4) { + ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); + PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); // Invalid Packet + ipv4 = 1; + } else if (ether_type == ETYPE_IPv6) { + ip = (prox_rte_ipv4_hdr *)(pkt + l2_len); + PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4); // Invalid Packet + } else { + continue; + } + + PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n"); + PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n"); + if (ipv4) { + // Even if IPv4 header contains options, options are after ip src and dst + ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t); + ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos)); + plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src); + register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + ip_len = sizeof(uint32_t); + } else { + ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr); + ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos)); + plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes)); + register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id); + ip_len = sizeof(struct ipv6_addr); + } + + for (int j = 0; j < task->n_rands; j++) { + offset = task->rand[j].rand_offset; + len = task->rand[j].rand_len; + mask = task->rand[j].rand_mask; + fixed = task->rand[j].fixed_bits; + plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed); + if (offset >= ip_src_pos + ip_len) // First random bit after IP + continue; + if (offset + len < ip_src_pos) // Last random bit before IP + continue; + + if (ipv4) { + if (offset >= ip_src_pos) { + int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1; + mask = mask & ip_src_mask; + fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask); + build_value(task, mask, 0, 0, fixed); + } else { + int32_t bits = ((ip_src_pos + 4 - offset - len) * 8); + mask = mask << bits; + fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1)); + build_value(task, mask, 0, 0, fixed); + } + } else { + // We do not support when random partially covers IP - either starting before or finishing after + if (offset + len >= ip_src_pos + ip_len) { // len over the ip + plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len); continue; } - // Need to find template back... - // Only try this if there are few templates - for (unsigned int idx = 0; idx < task->n_pkts; idx++) { - struct pkt_template *pktpl = &task->pkt_template[idx]; - uint32_t ip_dst_pos = pktpl->ip_dst_pos; - uint32_t *ip_dst = (uint32_t *)(((uint8_t *)pktpl->buf) + ip_dst_pos); - if (*ip_dst == ip) { - pktpl->dst_mac = *(uint64_t *)&(hdr->arp.data.sha); - } - out[j] = OUT_HANDLED; + if (offset < ip_src_pos) { + plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len); + continue; } - } else if (memcmp(&hdr->arp, &arp_request, 8) == 0) { - struct ether_addr s_addr; - if (!task->src_ip) { - create_mac(hdr, &s_addr); - prepare_arp_reply(hdr, &s_addr); - memcpy(hdr->ether_hdr.d_addr.addr_bytes, hdr->ether_hdr.s_addr.addr_bytes, 6); - memcpy(hdr->ether_hdr.s_addr.addr_bytes, &s_addr, 6); - out[j] = 0; - } else if (hdr->arp.data.tpa == task->src_ip) { - prepare_arp_reply(hdr, &task->src_mac); - memcpy(hdr->ether_hdr.d_addr.addr_bytes, hdr->ether_hdr.s_addr.addr_bytes, 6); - memcpy(hdr->ether_hdr.s_addr.addr_bytes, &task->src_mac, 6); - out[j] = 0; - } else { - out[j] = OUT_DISCARD; - plogx_dbg("Received ARP on unexpected IP %x, expecting %x\n", rte_be_to_cpu_32(hdr->arp.data.tpa), rte_be_to_cpu_32(task->src_ip)); + // Even for IPv6 the random mask supported by PROX are 32 bits only + struct ipv6_addr fixed_ipv6; + uint init_var_byte_pos = (offset - ip_src_pos); + for (uint i = 0; i < sizeof(struct ipv6_addr); i++) { + if (i < init_var_byte_pos) + fixed_ipv6.bytes[i] = ip6_src->bytes[i]; + else if (i < init_var_byte_pos + len) + fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF; + else + fixed_ipv6.bytes[i] = ip6_src->bytes[i]; } + build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6); } - } else { - out[j] = OUT_DISCARD; } } - ret = task->base.tx_pkt(&task->base, mbufs, n_pkts, out); } static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts) @@ -786,10 +926,6 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin int i, j; - if (unlikely((task->flags & FLAG_L3_GEN) && (n_pkts != 0))) { - handle_arp_pkts(task, mbufs, n_pkts); - } - task_gen_update_config(task); if (task->pkt_count == 0) { @@ -802,7 +938,7 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin token_time_update(&task->token_time, rte_rdtsc()); uint32_t would_send_bytes; - const uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes); + uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes); if (send_bulk == 0) return 0; @@ -817,10 +953,7 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk); task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk); task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk); - if (task_gen_write_dst_mac(task, new_pkts, pkt_hdr, send_bulk) < 0) - return 0; task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk); - task_gen_apply_all_sig(task, new_pkts, pkt_hdr, send_bulk); task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk); uint64_t tsc_before_tx; @@ -829,6 +962,20 @@ static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uin task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk); ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out); task_gen_store_accuracy(task, send_bulk, tsc_before_tx); + + // If we failed to send some packets, we need to do some clean-up: + + if (unlikely(ret)) { + // We need re-use the packets indexes not being sent + // Hence non-sent packets will not be considered as lost by the receiver when it looks at + // packet ids. This should also increase the percentage of packets used for latency measurements + task->pkt_queue_index -= ret; + + // In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong + // This would result in under-estimated latency (up to 0 or negative) + uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret); + task->earliest_tsc_next_pkt -= bulk_duration; + } return ret; } @@ -838,14 +985,17 @@ static void init_task_gen_seeds(struct task_gen *task) random_init_seed(&task->rand[i].state); } -static uint32_t pcap_count_pkts(pcap_t *handle) +static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size) { struct pcap_pkthdr header; const uint8_t *buf; uint32_t ret = 0; + *max_frame_size = 0; long pkt1_fpos = ftell(pcap_file(handle)); while ((buf = pcap_next(handle, &header))) { + if (header.len > *max_frame_size) + *max_frame_size = header.len; ret++; } int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET); @@ -862,7 +1012,7 @@ static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n) return (tot_inter_pkt + n / 2)/n; } -static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp) +static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size) { struct pcap_pkthdr header; const uint8_t *buf; @@ -873,7 +1023,7 @@ static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name); proto[i].len = header.len; - len = RTE_MIN(header.len, sizeof(proto[i].buf)); + len = RTE_MIN(header.len, max_frame_size); if (header.len > len) plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len); @@ -905,33 +1055,6 @@ static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts return 0; } -static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic) -{ - const uint16_t min_len = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr); - const uint16_t max_len = ETHER_MAX_LEN - 4; - - if (do_panic) { - PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n"); - PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len); - PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len); - return 0; - } else { - if (pkt_size == 0) { - plog_err("Invalid packet size length (no packet defined?)\n"); - return -1; - } - if (pkt_size > max_len) { - plog_err("pkt_size out of range (must be <= %u)\n", max_len); - return -1; - } - if (pkt_size < min_len) { - plog_err("pkt_size out of range (must be >= %u)\n", min_len); - return -1; - } - return 0; - } -} - static int check_all_pkt_size(struct task_gen *task, int do_panic) { int rc; @@ -942,31 +1065,14 @@ static int check_all_pkt_size(struct task_gen *task, int do_panic) return 0; } -static void check_fields_in_bounds(struct task_gen *task) +static int check_all_fields_in_bounds(struct task_gen *task, int do_panic) { - const uint32_t pkt_size = task->pkt_template[0].len; - - if (task->lat_enabled) { - uint32_t pos_beg = task->lat_pos; - uint32_t pos_end = task->lat_pos + 3U; - - PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n", - pos_beg, pos_end, pkt_size); - } - if (task->packet_id_pos) { - uint32_t pos_beg = task->packet_id_pos; - uint32_t pos_end = task->packet_id_pos + 4U; - - PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n", - pos_beg, pos_end, pkt_size); - } - if (task->accur_pos) { - uint32_t pos_beg = task->accur_pos; - uint32_t pos_end = task->accur_pos + 3U; - - PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u%-u, but packet size is %u bytes\n", - pos_beg, pos_end, pkt_size); + int rc; + for (uint32_t i = 0; i < task->n_pkts;++i) { + if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0) + return rc; } + return 0; } static void task_gen_pkt_template_recalc_metadata(struct task_gen *task) @@ -982,25 +1088,37 @@ static void task_gen_pkt_template_recalc_metadata(struct task_gen *task) static void task_gen_pkt_template_recalc_checksum(struct task_gen *task) { struct pkt_template *template; - struct ipv4_hdr *ip; + prox_rte_ipv4_hdr *ip; task->runtime_checksum_needed = 0; for (size_t i = 0; i < task->n_pkts; ++i) { template = &task->pkt_template[i]; if (template->l2_len == 0) continue; - ip = (struct ipv4_hdr *)(template->buf + template->l2_len); - - ip->hdr_checksum = 0; - prox_ip_cksum_sw(ip); - uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len; - - if (ip->next_proto_id == IPPROTO_UDP) { - struct udp_hdr *udp = (struct udp_hdr *)(((uint8_t *)ip) + template->l3_len); - prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr); - } else if (ip->next_proto_id == IPPROTO_TCP) { - struct tcp_hdr *tcp = (struct tcp_hdr *)(((uint8_t *)ip) + template->l3_len); - prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr); + ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len); + if (ip->version_ihl >> 4 == 4) { + ip->hdr_checksum = 0; + prox_ip_cksum_sw(ip); + uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len; + if (ip->next_proto_id == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len); + prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr); + } else if (ip->next_proto_id == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len); + prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr); + } + } else if (ip->version_ihl >> 4 == 6) { + prox_rte_ipv6_hdr *ip6; + ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len); + if (ip6->proto == IPPROTO_UDP) { + prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1); + udp->dgram_cksum = 0; + udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp); + } else if (ip6->proto == IPPROTO_TCP) { + prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1); + tcp->cksum = 0; + tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp); + } } /* The current implementation avoids checksum @@ -1022,14 +1140,28 @@ static void task_gen_pkt_template_recalc_all(struct task_gen *task) task_gen_pkt_template_recalc_checksum(task); } +static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes) +{ + struct pkt_template *src, *dst; + + for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) { + for (size_t i = 0; i < task->orig_n_pkts; ++i) { + dst = &task->pkt_template[j * task->orig_n_pkts + i]; + dst->len = pkt_sizes[j]; + } + } +} + static void task_gen_reset_pkt_templates_len(struct task_gen *task) { struct pkt_template *src, *dst; - for (size_t i = 0; i < task->n_pkts; ++i) { - src = &task->pkt_template_orig[i]; - dst = &task->pkt_template[i]; - dst->len = src->len; + for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) { + for (size_t i = 0; i < task->orig_n_pkts; ++i) { + src = &task->pkt_template_orig[i]; + dst = &task->pkt_template[j * task->orig_n_pkts + i]; + dst->len = src->len; + } } } @@ -1037,82 +1169,105 @@ static void task_gen_reset_pkt_templates_content(struct task_gen *task) { struct pkt_template *src, *dst; - for (size_t i = 0; i < task->n_pkts; ++i) { - src = &task->pkt_template_orig[i]; - dst = &task->pkt_template[i]; - memcpy(dst->buf, src->buf, dst->len); + for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) { + for (size_t i = 0; i < task->orig_n_pkts; ++i) { + src = &task->pkt_template_orig[i]; + dst = &task->pkt_template[j * task->orig_n_pkts + i]; + memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len)); + if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) { + rte_memcpy(&dst->buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr)); + } + task_gen_apply_sig(task, dst); + } } } static void task_gen_reset_pkt_templates(struct task_gen *task) { - task_gen_reset_pkt_templates_len(task); + if (task->imix_nb_pkts) + task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes); + else + task_gen_reset_pkt_templates_len(task); task_gen_reset_pkt_templates_content(task); task_gen_pkt_template_recalc_all(task); } static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ) { - const int socket_id = rte_lcore_to_socket_id(targ->lconf->id); - - if (targ->pkt_size > sizeof(task->pkt_template[0].buf)) - targ->pkt_size = sizeof(task->pkt_template[0].buf); - task->n_pkts = 1; - - size_t mem_size = task->n_pkts * sizeof(*task->pkt_template); - task->pkt_template = prox_zmalloc(mem_size, socket_id); - task->pkt_template_orig = prox_zmalloc(mem_size, socket_id); + int rc; - PROX_PANIC(task->pkt_template == NULL || - task->pkt_template_orig == NULL, - "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size); + task->orig_n_pkts = 1; + if (task->imix_nb_pkts == 0) { + task->n_pkts = 1; + task->imix_pkt_sizes[0] = targ->pkt_size; + } else { + task->n_pkts = task->imix_nb_pkts; + } + task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP); - rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, targ->pkt_size); - task->pkt_template_orig[0].len = targ->pkt_size; + rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size); + task->pkt_template_orig[0].len = task->imix_pkt_sizes[0]; task_gen_reset_pkt_templates(task); - check_all_pkt_size(task, 1); - check_fields_in_bounds(task); + check_all_pkt_size(task, DO_PANIC); + check_all_fields_in_bounds(task, DO_PANIC); + + // If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline + // In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet + task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes); } static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ) { - const int socket_id = rte_lcore_to_socket_id(targ->lconf->id); char err[PCAP_ERRBUF_SIZE]; + uint32_t max_frame_size; pcap_t *handle = pcap_open_offline(targ->pcap_file, err); PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err); - task->n_pkts = pcap_count_pkts(handle); - plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file); + task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size); + plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size); + PROX_PANIC(max_frame_size > task->max_frame_size, + max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ? + "pkt_size too high and jumbo frames disabled" : "pkt_size > mtu"); if (targ->n_pkts) - task->n_pkts = RTE_MIN(task->n_pkts, targ->n_pkts); - PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n"); + task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts); + if (task->imix_nb_pkts == 0) { + task->n_pkts = task->orig_n_pkts; + } else { + task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts; + } + task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP); plogx_info("Loading %u packets from pcap\n", task->n_pkts); - size_t mem_size = task->n_pkts * sizeof(*task->pkt_template); - task->pkt_template = prox_zmalloc(mem_size, socket_id); - task->pkt_template_orig = prox_zmalloc(mem_size, socket_id); - PROX_PANIC(task->pkt_template == NULL || - task->pkt_template_orig == NULL, - "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size); - - pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->pkt_template_orig, NULL); + + pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size); pcap_close(handle); task_gen_reset_pkt_templates(task); + check_all_pkt_size(task, DO_PANIC); + check_all_fields_in_bounds(task, DO_PANIC); + task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes); } -static struct rte_mempool *task_gen_create_mempool(struct task_args *targ) +static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size) { static char name[] = "gen_pool"; struct rte_mempool *ret; const int sock_id = rte_lcore_to_socket_id(targ->lconf->id); name[0]++; - ret = rte_mempool_create(name, targ->nb_mbuf - 1, MBUF_SIZE, + uint32_t mbuf_size = TX_MBUF_SIZE; + if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size) + mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM; + plog_info("\t\tCreating mempool with name '%s'\n", name); + ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0, sock_id, 0); PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n", sock_id, targ->nb_mbuf - 1); + + plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret, + targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id); + return ret; } @@ -1128,18 +1283,33 @@ int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size) struct task_gen *task = (struct task_gen *)tbase; int rc; - task->pkt_template[0].len = pkt_size; - if ((rc = check_all_pkt_size(task, 0)) != 0) - return rc; - check_fields_in_bounds(task); - return rc; + for (size_t i = 0; i < task->n_pkts; ++i) { + if ((rc = check_pkt_size(task, pkt_size, 0)) != 0) + return rc; + if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0) + return rc; + } + for (size_t i = 0; i < task->n_pkts; ++i) { + task->pkt_template[i].len = pkt_size; + } + return 0; } -void task_gen_set_gateway_ip(struct task_base *tbase, uint32_t ip) +int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes) { struct task_gen *task = (struct task_gen *)tbase; - task->gw_ip = ip; - task->flags &= ~FLAG_DST_MAC_KNOWN; + int rc; + + memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t)); + for (size_t i = 0; i < nb_pkt_sizes; ++i) { + if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0) + return rc; + if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0) + return rc; + } + // only set new_imix_nb_pkts if checks of pkt sizes succeeded + task->new_imix_nb_pkts = nb_pkt_sizes; + return 0; } void task_gen_set_rate(struct task_base *tbase, uint64_t bps) @@ -1159,13 +1329,14 @@ void task_gen_reset_randoms(struct task_base *tbase) task->rand[i].rand_offset = 0; } task->n_rands = 0; - task->flags &= ~FLAG_RANDOM_IPS; } int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len) { struct task_gen *task = (struct task_gen *)tbase; + if (offset + len > task->max_frame_size) + return -1; for (size_t i = 0; i < task->n_pkts; ++i) { uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8); uint8_t *dst = task->pkt_template[i].buf; @@ -1183,6 +1354,16 @@ void task_gen_reset_values(struct task_base *tbase) struct task_gen *task = (struct task_gen *)tbase; task_gen_reset_pkt_templates_content(task); + task_gen_pkt_template_recalc_metadata(task); + check_all_pkt_size(task, DO_NOT_PANIC); + check_all_fields_in_bounds(task, DO_NOT_PANIC); + task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes); + + if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) { + for (uint32_t i = 0; i < task->n_pkts; ++i) { + rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr)); + } + } } uint32_t task_gen_get_n_randoms(struct task_base *tbase) @@ -1195,40 +1376,44 @@ uint32_t task_gen_get_n_randoms(struct task_base *tbase) static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ) { struct task_gen_pcap *task = (struct task_gen_pcap *)tbase; - const uint32_t sockid = rte_lcore_to_socket_id(targ->lconf->id); + task->socket_id = rte_lcore_to_socket_id(targ->lconf->id); + uint32_t max_frame_size; task->loop = targ->loop; task->pkt_idx = 0; task->hz = rte_get_tsc_hz(); - task->local_mbuf.mempool = task_gen_create_mempool(targ); - - PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n"); - char err[PCAP_ERRBUF_SIZE]; pcap_t *handle = pcap_open_offline(targ->pcap_file, err); PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err); - task->n_pkts = pcap_count_pkts(handle); + task->n_pkts = pcap_count_pkts(handle, &max_frame_size); plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file); + task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size); + + PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n"); + if (targ->n_pkts) { plogx_info("Configured to load %u packets\n", targ->n_pkts); if (task->n_pkts > targ->n_pkts) task->n_pkts = targ->n_pkts; } - PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n"); - plogx_info("Loading %u packets from pcap\n", task->n_pkts); size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc)); - uint8_t *mem = prox_zmalloc(mem_size, sockid); + uint8_t *mem = prox_zmalloc(mem_size, task->socket_id); PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size); task->proto = (struct pkt_template *) mem; task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto)); - pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc); + for (uint i = 0; i < targ->n_pkts; i++) { + task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id); + PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size); + } + + pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size); pcap_close(handle); } @@ -1276,22 +1461,45 @@ int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t of task->rand[task->n_rands].rand_mask = mask; task->rand[task->n_rands].fixed_bits = fixed; - struct pkt_template *pktpl = &task->pkt_template[0]; - if (!((offset >= pktpl->ip_dst_pos + 4) || (offset + len < pktpl->ip_dst_pos))) { - plog_info("\tUsing randoms IP destinations\n"); - task->flags |= FLAG_RANDOM_IPS; - } - task->n_rands++; return 0; } +static void start(struct task_base *tbase) +{ + struct task_gen *task = (struct task_gen *)tbase; + task->pkt_queue_index = 0; + + task_gen_reset_token_time(task); + if (tbase->l3.tmaster) { + register_all_ip_to_ctrl_plane(task); + } + + /* TODO + Handle the case when two tasks transmit to the same port + and one of them is stopped. In that case ARP (requests or replies) + might not be sent. Master will have to keep a list of rings. + stop will have to de-register IP from ctrl plane. + un-registration will remove the ring. when having more than + one active rings, master can always use the first one + */ +} + +static void start_pcap(struct task_base *tbase) +{ + struct task_gen_pcap *task = (struct task_gen_pcap *)tbase; + /* When we start, the first packet is sent immediately. */ + task->last_tsc = rte_rdtsc() - task->proto_tsc[0]; + task->pkt_idx = 0; +} + static void init_task_gen_early(struct task_args *targ) { uint8_t *generator_count = prox_sh_find_system("generator_count"); if (generator_count == NULL) { - generator_count = prox_zmalloc(sizeof(*generator_count), 0); + generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id)); + PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n"); prox_sh_add_system("generator_count", generator_count); } targ->generator_id = *generator_count; @@ -1301,10 +1509,21 @@ static void init_task_gen_early(struct task_args *targ) static void init_task_gen(struct task_base *tbase, struct task_args *targ) { struct task_gen *task = (struct task_gen *)tbase; + task->socket_id = rte_lcore_to_socket_id(targ->lconf->id); task->packet_id_pos = targ->packet_id_pos; - task->local_mbuf.mempool = task_gen_create_mempool(targ); + struct prox_port_cfg *port = find_reachable_port(targ); + // TODO: check that all reachable ports have the same mtu... + if (port) { + task->cksum_offload = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM); + task->port = port; + task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE; + } else { + // Not generating to any port... + task->max_frame_size = PROX_RTE_ETHER_MAX_LEN; + } + task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size); PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n"); task->pkt_idx = 0; task->hz = rte_get_tsc_hz(); @@ -1314,9 +1533,16 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ) task->sig = targ->sig; task->new_rate_bps = targ->rate_bps; + /* + * For tokens, use 10 Gbps as base rate + * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps + * Script can query prox "port info" command to find out the port link speed to know + * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has + * probably also to check the driver (as returned by the same "port info" command. + */ struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1); - token_time_init(&task->token_time, &tt_cfg); + init_task_gen_seeds(task); task->min_bulk_size = targ->min_bulk_size; @@ -1334,98 +1560,55 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ) PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n"); task->generator_id = targ->generator_id; - task->link_speed = UINT64_MAX; - if (targ->nb_txrings == 0 && targ->nb_txports == 1) - task->link_speed = 1250000000; + plog_info("\t\tGenerator id = %d\n", task->generator_id); + + // Allocate array holding bytes to tsc for supported frame sizes + task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id); + PROX_PANIC(task->bytes_to_tsc == NULL, + "Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size); + + // task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC. + // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04) + uint64_t bytes_per_hz = UINT64_MAX; + if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) { + bytes_per_hz = task->port->max_link_speed * 125000L; + plog_info("\t\tPort %u: max link speed is %ld Mbps\n", + (uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000); + } + // There are cases where hz estimate might be slighly over-estimated + // This results in too much extrapolation + // Only account for 99% of extrapolation to handle cases with up to 1% error clocks + for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) { + if (bytes_per_hz == UINT64_MAX) + task->bytes_to_tsc[i] = 0; + else + task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz; + } + task->imix_nb_pkts = targ->imix_nb_pkts; + for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) { + task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i]; + } if (!strcmp(targ->pcap_file, "")) { - plog_info("\tUsing inline definition of a packet\n"); + plog_info("\t\tUsing inline definition of a packet\n"); task_init_gen_load_pkt_inline(task, targ); } else { - plog_info("Loading from pcap %s\n", targ->pcap_file); + plog_info("\t\tLoading from pcap %s\n", targ->pcap_file); task_init_gen_load_pcap(task, targ); } - if ((targ->flags & DSF_KEEP_SRC_MAC) == 0 && (targ->nb_txrings || targ->nb_txports)) { - uint8_t *src_addr = prox_port_cfg[tbase->tx_params_hw.tx_port_queue->port].eth_addr.addr_bytes; - for (uint32_t i = 0; i < task->n_pkts; ++i) { - rte_memcpy(&task->pkt_template[i].buf[6], src_addr, 6); - } - } - memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(struct ether_addr)); - if (!strcmp(targ->task_init->sub_mode_str, "l3")) { - // In L3 GEN, we need to receive ARP replies - task->flags = FLAG_L3_GEN; - task->gw_ip = rte_cpu_to_be_32(targ->gateway_ipv4); - uint32_t n_entries; - - if (targ->number_gen_ip == 0) - n_entries = 1048576; - else - n_entries = targ->number_gen_ip; - - static char hash_name[30]; - sprintf(hash_name, "A%03d_mac_table", targ->lconf->id); - - struct rte_hash_parameters hash_params = { - .name = hash_name, - .entries = n_entries, - .key_len = sizeof(uint32_t), - .hash_func = rte_hash_crc, - .hash_func_init_val = 0, - }; - task->mac_hash = rte_hash_create(&hash_params); - PROX_PANIC(task->mac_hash == NULL, "Failed to set up mac hash table for %d IP\n", n_entries); - - const uint32_t socket = rte_lcore_to_socket_id(targ->lconf->id); - task->dst_mac = (uint64_t *)prox_zmalloc(n_entries * sizeof(uint64_t), socket); - PROX_PANIC(task->dst_mac == NULL, "Failed to allocate mac table for %d IP\n", n_entries); - + PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port"); + if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) { + task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC; + memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr)); for (uint32_t i = 0; i < task->n_pkts; ++i) { - // For all destination IP, ARP request will need to be sent - // Store position of Destination IP in template - int ip_dst_pos = 0; - int maybe_ipv4 = 0; - int l2_len = sizeof(struct ether_hdr); - struct vlan_hdr *vlan_hdr; - uint8_t *pkt = task->pkt_template[i].buf; - struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt; - struct ipv4_hdr *ip; - uint16_t ether_type = eth_hdr->ether_type; - - // Unstack VLAN tags - while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(struct vlan_hdr) < task->pkt_template[i].len)) { - vlan_hdr = (struct vlan_hdr *)(pkt + l2_len); - l2_len +=4; - ether_type = vlan_hdr->eth_proto; - } - if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) { - l2_len +=4; - maybe_ipv4 = 1; - } - if ((ether_type == ETYPE_IPv4) || maybe_ipv4) { - struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + l2_len); - PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); - // Even if IPv4 header contains options, options are after ip src and dst - ip_dst_pos = l2_len + sizeof(struct ipv4_hdr) - sizeof(uint32_t); - uint32_t *p = ((uint32_t *)(task->pkt_template[i].buf + ip_dst_pos - sizeof(uint32_t))); - task->pkt_template[i].ip_dst_pos = ip_dst_pos; - task->pkt_template[i].ip_src = *p; - uint32_t *p1 = ((uint32_t *)(task->pkt_template[i].buf + ip_dst_pos)); - plog_info("\tip_dst_pos = %d, ip_dst = %x\n", ip_dst_pos, *p1); - } + rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr)); } - task->src_ip = rte_cpu_to_be_32(targ->local_ipv4); } for (uint32_t i = 0; i < targ->n_rand_str; ++i) { PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX), "Failed to add random\n"); } - - struct prox_port_cfg *port = find_reachable_port(targ); - if (port) { - task->cksum_offload = port->capabilities.tx_offload_cksum; - } } static struct task_init task_init_gen = { @@ -1433,10 +1616,11 @@ static struct task_init task_init_gen = { .init = init_task_gen, .handle = handle_gen_bulk, .start = start, + .early_init = init_task_gen_early, #ifdef SOFT_CRC // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the // vector mode is used by DPDK, resulting (theoretically) in higher performance. - .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS, + .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS, #else .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX, #endif @@ -1449,24 +1633,27 @@ static struct task_init task_init_gen_l3 = { .init = init_task_gen, .handle = handle_gen_bulk, .start = start, + .early_init = init_task_gen_early, #ifdef SOFT_CRC // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the // vector mode is used by DPDK, resulting (theoretically) in higher performance. - .flag_features = TASK_FEATURE_ZERO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_ZERO_RX, + .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS, #else - .flag_features = TASK_FEATURE_ZERO_RX, + .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX, #endif .size = sizeof(struct task_gen) }; +/* This mode uses time stamps in the pcap file */ static struct task_init task_init_gen_pcap = { .mode_str = "gen", .sub_mode_str = "pcap", .init = init_task_gen_pcap, .handle = handle_gen_pcap_bulk, .start = start_pcap, + .early_init = init_task_gen_early, #ifdef SOFT_CRC - .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS, + .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS, #else .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX, #endif