2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_cycles.h>
21 #include <rte_version.h>
22 #include <rte_byteorder.h>
23 #include <rte_ether.h>
24 #include <rte_hash_crc.h>
25 #include <rte_malloc.h>
27 #include "prox_shared.h"
29 #include "prox_malloc.h"
30 #include "handle_gen.h"
31 #include "handle_lat.h"
32 #include "task_init.h"
33 #include "task_base.h"
34 #include "prox_port_cfg.h"
39 #include "mbuf_utils.h"
41 #include "prox_cksum.h"
43 #include "prox_assert.h"
45 #include "token_time.h"
46 #include "local_mbuf.h"
49 #include "handle_master.h"
51 #include "prox_ipv6.h"
60 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
63 #define DO_NOT_PANIC 0
66 #define NOT_FROM_PCAP 0
70 #define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1
72 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
74 const uint32_t pkt_size = pkt_template->len;
76 rte_pktmbuf_pkt_len(mbuf) = pkt_size;
77 rte_pktmbuf_data_len(mbuf) = pkt_size;
79 rte_memcpy(pkt, pkt_template->buf, pkt_template->len);
82 struct task_gen_pcap {
83 struct task_base base;
85 struct local_mbuf local_mbuf;
87 struct pkt_template *proto;
96 struct task_base base;
98 struct token_time token_time;
99 struct local_mbuf local_mbuf;
100 struct pkt_template *pkt_template; /* packet templates used at runtime */
101 uint64_t write_duration_estimate; /* how long it took previously to write the time stamps in the packets */
102 uint64_t earliest_tsc_next_pkt;
103 uint64_t new_rate_bps;
104 uint64_t pkt_queue_index;
105 uint32_t n_pkts; /* number of packets in pcap */
106 uint32_t orig_n_pkts; /* number of packets in pcap */
107 uint32_t pkt_idx; /* current packet from pcap */
108 uint32_t pkt_count; /* how many pakets to generate */
109 uint32_t max_frame_size;
110 uint32_t runtime_flags;
112 uint16_t packet_id_pos;
117 uint8_t generator_id;
118 uint8_t n_rands; /* number of randoms */
119 uint8_t n_ranges; /* number of ranges */
120 uint8_t min_bulk_size;
121 uint8_t max_bulk_size;
123 uint8_t runtime_checksum_needed;
126 uint32_t rand_mask; /* since the random vals are uniform, masks don't introduce bias */
127 uint32_t fixed_bits; /* length of each random (max len = 4) */
128 uint16_t rand_offset; /* each random has an offset*/
129 uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
131 struct range ranges[MAX_RANGES];
132 uint64_t accur[ACCURACY_WINDOW];
133 uint64_t pkt_tsc_offset[64];
134 struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
135 prox_rte_ether_addr src_mac;
137 uint8_t cksum_offload;
138 struct prox_port_cfg *port;
139 uint64_t *bytes_to_tsc;
140 uint32_t imix_pkt_sizes[MAX_IMIX_PKTS];
141 uint32_t imix_nb_pkts;
142 uint32_t new_imix_nb_pkts;
143 } __rte_cache_aligned;
145 static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes);
146 static void task_gen_reset_pkt_templates_content(struct task_gen *task);
147 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task);
148 static int check_all_pkt_size(struct task_gen *task, int do_panic);
149 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic);
151 static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip)
153 /* Optimize for common case of IPv4 header without options. */
154 if (ip->version_ihl == 0x45)
155 return sizeof(prox_rte_ipv4_hdr);
156 if (unlikely(ip->version_ihl >> 4 != 4)) {
157 plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
160 return (ip->version_ihl & 0xF) * 4;
163 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
165 *l2_len = sizeof(prox_rte_ether_hdr);
167 prox_rte_vlan_hdr *vlan_hdr;
168 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
169 prox_rte_ipv4_hdr *ip;
170 uint16_t ether_type = eth_hdr->ether_type;
173 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
174 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len);
176 ether_type = vlan_hdr->eth_proto;
179 // No L3 cksum offload for IPv6, but TODO L4 offload
180 // ETYPE_EoGRE CRC not implemented yet
182 switch (ether_type) {
196 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
201 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len);
202 if (ip->version_ihl >> 4 == 4)
203 *l3_len = ipv4_get_hdr_len(ip);
207 static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_template *pkt_template, int cksum_offload)
209 uint16_t l2_len = pkt_template->l2_len;
210 uint16_t l3_len = pkt_template->l3_len;
212 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len);
214 prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
215 } else if (ip->version_ihl >> 4 == 6) {
216 prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len);
217 if (ip6->proto == IPPROTO_UDP) {
218 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
219 udp->dgram_cksum = 0;
220 udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
221 } else if (ip6->proto == IPPROTO_TCP) {
222 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
224 tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
229 static void task_gen_reset_token_time(struct task_gen *task)
231 token_time_set_bpp(&task->token_time, task->new_rate_bps);
232 token_time_reset(&task->token_time, rte_rdtsc(), 0);
235 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
237 if (task->pkt_count == (uint32_t)-1)
240 if (task->pkt_count >= send_bulk)
241 task->pkt_count -= send_bulk;
247 static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts)
249 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
250 uint64_t now = rte_rdtsc();
251 uint64_t send_bulk = 0;
252 uint32_t pkt_idx_tmp = task->pkt_idx;
254 if (pkt_idx_tmp == task->n_pkts) {
255 PROX_ASSERT(task->loop);
259 for (uint16_t j = 0; j < 64; ++j) {
260 uint64_t tsc = task->proto_tsc[pkt_idx_tmp];
261 if (task->last_tsc + tsc <= now) {
262 task->last_tsc += tsc;
265 if (pkt_idx_tmp == task->n_pkts) {
276 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
277 if (new_pkts == NULL)
280 for (uint16_t j = 0; j < send_bulk; ++j) {
281 struct rte_mbuf *next_pkt = new_pkts[j];
282 struct pkt_template *pkt_template = &task->proto[task->pkt_idx];
283 uint8_t *hdr = rte_pktmbuf_mtod(next_pkt, uint8_t *);
285 pkt_template_init_mbuf(pkt_template, next_pkt, hdr);
288 if (task->pkt_idx == task->n_pkts) {
296 return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
299 static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
301 return task->bytes_to_tsc[bytes];
304 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
306 return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1;
309 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
311 return (task->pkt_idx + offset) % task->n_pkts;
314 static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *total_bytes)
316 /* The biggest bulk we allow to send is task->max_bulk_size
317 packets. The max bulk size can also be limited by the
318 pkt_count field. At the same time, we are rate limiting
319 based on the specified speed (in bytes per second) so token
320 bucket based rate limiting must also be applied. The
321 minimum bulk size is also constrained. If the calculated
322 bulk size is less then the minimum, then don't send
325 const uint32_t min_bulk = task->min_bulk_size;
326 uint32_t max_bulk = task->max_bulk_size;
328 if (task->pkt_count != (uint32_t)-1 && task->pkt_count < max_bulk) {
329 max_bulk = task->pkt_count;
332 uint32_t send_bulk = 0;
333 uint32_t pkt_idx_tmp = task->pkt_idx;
334 uint32_t would_send_bytes = 0;
338 * TODO - this must be improved to take into account the fact that, after applying randoms
339 * The packet can be replaced by an ARP
341 for (uint16_t j = 0; j < max_bulk; ++j) {
342 struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
343 pkt_size = pktpl->len;
344 uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
345 if (pkt_len + would_send_bytes > task->token_time.bytes_now)
348 pkt_idx_tmp = task_gen_next_pkt_idx(task, pkt_idx_tmp);
351 would_send_bytes += pkt_len;
354 if (send_bulk < min_bulk)
356 *total_bytes = would_send_bytes;
360 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
362 uint32_t ret, ret_tmp;
364 for (uint16_t i = 0; i < task->n_rands; ++i) {
365 ret = random_next(&task->rand[i].state);
366 ret_tmp = (ret & task->rand[i].rand_mask) | task->rand[i].fixed_bits;
368 ret_tmp = rte_bswap32(ret_tmp);
369 /* At this point, the lower order bytes (BE) contain
370 the generated value. The address where the values
371 of interest starts is at ret_tmp + 4 - rand_len. */
372 uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
373 rte_memcpy(hdr + task->rand[i].rand_offset, pret_tmp + 4 - task->rand[i].rand_len, task->rand[i].rand_len);
377 static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
382 for (uint16_t i = 0; i < count; ++i)
383 task_gen_apply_random_fields(task, pkt_hdr[i]);
386 static void task_gen_apply_all_ranges(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
392 for (uint16_t i = 0; i < count; ++i) {
393 for (uint16_t j = 0; j < task->n_ranges; ++j) {
394 if (unlikely(task->ranges[j].value == task->ranges[j].max))
395 task->ranges[j].value = task->ranges[j].min;
397 task->ranges[j].value++;
398 ret = rte_bswap32(task->ranges[j].value);
399 uint8_t *pret = (uint8_t*)&ret;
400 rte_memcpy(pkt_hdr[i] + task->ranges[j].offset, pret + 4 - task->ranges[j].range_len, task->ranges[j].range_len);
405 static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
407 *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
410 static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst)
413 *(uint32_t *)(dst->buf + task->sig_pos) = task->sig;
416 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
418 if (!task->accur_pos)
421 /* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in
422 packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the
424 for (uint16_t j = 0; j < count; ++j) {
425 uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)];
426 task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
430 static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
432 struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_pos);
437 static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
439 if (!task->packet_id_pos)
442 for (uint16_t i = 0; i < count; ++i) {
444 unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
445 task_gen_apply_unique_id(task, pkt_hdr[i], &id);
449 static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
451 if (!(task->runtime_flags & TASK_TX_CRC))
454 if (!task->runtime_checksum_needed)
457 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
458 for (uint16_t i = 0; i < count; ++i) {
459 struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
460 checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
461 pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
465 static void task_gen_consume_tokens(struct task_gen *task, uint32_t tokens, uint32_t send_count)
467 /* If max burst has been sent, we can't keep up so just assume
468 that we can (leaving a "gap" in the packet stream on the
470 task->token_time.bytes_now -= tokens;
471 if (send_count == task->max_bulk_size && task->token_time.bytes_now > tokens) {
472 task->token_time.bytes_now = tokens;
476 static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t count)
478 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
479 struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
480 uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
481 #ifdef NO_EXTRAPOLATION
482 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1];
484 uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
485 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
488 return bulk_duration;
491 static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
493 if (!task->lat_enabled)
496 uint64_t tx_tsc, delta_t;
497 uint64_t tsc_before_tx = 0;
499 /* Just before sending the packets, apply the time stamp
500 relative to when the first packet will be sent. The first
501 packet will be sent now. The time is read for each packet
502 to reduce the error towards the actual time the packet will
504 uint64_t write_tsc_after, write_tsc_before;
506 write_tsc_before = rte_rdtsc();
508 /* The time it took previously to write the time stamps in the
509 packets is used as an estimate for how long it will take to
510 write the time stamps now. The estimated time at which the
511 packets will actually be sent will be at tx_tsc. */
512 tx_tsc = write_tsc_before + task->write_duration_estimate;
514 /* The offset delta_t tracks the difference between the actual
515 time and the time written in the packets. Adding the offset
516 to the actual time insures that the time written in the
517 packets is monotonically increasing. At the same time,
518 simply sleeping until delta_t is zero would leave a period
519 of silence on the line. The error has been introduced
520 earlier, but the packets have already been sent. */
522 /* This happens typically if previous bulk was delayed
523 by an interrupt e.g. (with Time in nsec)
524 Time x: sleep 4 microsec
525 Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes)
526 Time x+5000: send 16 packets (16 packets as 1000 nsec)
527 When we send the 16 packets, the 64 ealier packets are not yet
529 if (tx_tsc < task->earliest_tsc_next_pkt)
530 delta_t = task->earliest_tsc_next_pkt - tx_tsc;
534 for (uint16_t i = 0; i < count; ++i) {
535 uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
536 const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
537 *pos = pkt_tsc >> LATENCY_ACCURACY;
540 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
541 task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
542 write_tsc_after = rte_rdtsc();
543 task->write_duration_estimate = write_tsc_after - write_tsc_before;
545 /* Make sure that the time stamps that were written
546 are valid. The offset must be taken into account */
548 tsc_before_tx = rte_rdtsc();
549 } while (tsc_before_tx < tx_tsc);
551 return tsc_before_tx;
554 static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint64_t tsc_before_tx)
556 if (!task->accur_pos)
559 uint64_t accur = rte_rdtsc() - tsc_before_tx;
560 uint64_t first_accuracy_idx = task->pkt_queue_index - count;
562 for (uint32_t i = 0; i < count; ++i) {
563 uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1);
565 task->accur[accuracy_idx] = accur;
569 static void task_gen_load_and_prefetch(struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
571 for (uint16_t i = 0; i < count; ++i)
572 rte_prefetch0(mbufs[i]);
573 for (uint16_t i = 0; i < count; ++i)
574 pkt_hdr[i] = rte_pktmbuf_mtod(mbufs[i], uint8_t *);
575 for (uint16_t i = 0; i < count; ++i)
576 rte_prefetch0(pkt_hdr[i]);
579 static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
581 uint64_t will_send_bytes = 0;
583 for (uint16_t i = 0; i < count; ++i) {
584 struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
585 struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
586 pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
587 prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i];
588 if (task->lat_enabled) {
589 #ifdef NO_EXTRAPOLATION
590 task->pkt_tsc_offset[i] = 0;
592 task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
594 will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
596 task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
600 static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap)
602 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
603 size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template);
604 task->pkt_template = prox_zmalloc(mem_size, task->socket_id);
605 task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id);
607 if (task->pkt_template == NULL || task->pkt_template_orig == NULL) {
608 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template");
612 for (size_t i = 0; i < orig_nb_pkts; i++) {
613 task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
614 if (task->pkt_template_orig[i].buf == NULL) {
615 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
619 for (size_t i = 0; i < nb_pkts; i++) {
620 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
621 if (task->pkt_template[i].buf == NULL) {
622 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
629 static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic)
631 // Need to free up bufs allocated in previous (longer) imix
632 for (size_t i = nb_pkts; i < task->n_pkts; i++) {
633 if (task->pkt_template[i].buf) {
634 rte_free(task->pkt_template[i].buf);
635 task->pkt_template[i].buf = NULL;
639 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
640 size_t old_mem_size = task->n_pkts * sizeof(*task->pkt_template);
641 if (old_mem_size > mem_size)
642 old_mem_size = mem_size;
644 struct pkt_template *ptr;
646 // re-allocate memory for new pkt_template (this might allocate additional memory or free up some...)
647 if ((ptr = rte_malloc_socket(NULL, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) {
648 memcpy(ptr, task->pkt_template, old_mem_size);
649 rte_free(task->pkt_template);
650 task->pkt_template = ptr;
652 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size);
656 // Need to allocate bufs for new template but no need to reallocate for existing ones
657 for (size_t i = task->n_pkts; i < nb_pkts; ++i) {
658 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
659 if (task->pkt_template[i].buf == NULL) {
660 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i);
667 static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
669 const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr);
670 const uint16_t max_len = task->max_frame_size;
673 PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
674 PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
675 PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
679 plog_err("Invalid packet size length (no packet defined?)\n");
682 if (pkt_size > max_len) {
683 if (pkt_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4)
684 plog_err("pkt_size too high and jumbo frames disabled\n");
686 plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len);
689 if (pkt_size < min_len) {
690 plog_err("pkt_size out of range (must be >= %u)\n", min_len);
697 static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
699 if (task->lat_enabled) {
700 uint32_t pos_beg = task->lat_pos;
701 uint32_t pos_end = task->lat_pos + 3U;
704 PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
705 pos_beg, pos_end, pkt_size);
706 else if (pkt_size <= pos_end) {
707 plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
711 if (task->packet_id_pos) {
712 uint32_t pos_beg = task->packet_id_pos;
713 uint32_t pos_end = task->packet_id_pos + 4U;
716 PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
717 pos_beg, pos_end, pkt_size);
718 else if (pkt_size <= pos_end) {
719 plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
723 if (task->accur_pos) {
724 uint32_t pos_beg = task->accur_pos;
725 uint32_t pos_end = task->accur_pos + 3U;
728 PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n",
729 pos_beg, pos_end, pkt_size);
730 else if (pkt_size <= pos_end) {
731 plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
738 static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t n_orig_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
742 prox_rte_ipv4_hdr *ip;
743 struct pkt_template *template;
745 for (size_t j = 0; j < nb_pkt_sizes; ++j) {
746 for (size_t i = 0; i < n_orig_pkts; ++i) {
747 k = j * n_orig_pkts + i;
748 template = &task->pkt_template[k];
749 if (template->l2_len == 0)
751 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
752 ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len);
753 l4_len = pkt_sizes[j] - template->l2_len - template->l3_len;
754 ip->hdr_checksum = 0;
755 prox_ip_cksum_sw(ip);
757 if (ip->next_proto_id == IPPROTO_UDP) {
758 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
759 udp->dgram_len = rte_bswap16(l4_len);
760 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
761 } else if (ip->next_proto_id == IPPROTO_TCP) {
762 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
763 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
770 static int task_gen_apply_imix(struct task_gen *task, int do_panic)
772 struct pkt_template *ptr;
774 task->imix_nb_pkts = task->new_imix_nb_pkts;
775 uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
777 if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0))
780 task->n_pkts = n_pkts;
781 if (task->pkt_idx >= n_pkts)
783 task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
784 task_gen_reset_pkt_templates_content(task);
785 task_gen_pkt_template_recalc_metadata(task);
786 check_all_pkt_size(task, DO_NOT_PANIC);
787 check_all_fields_in_bounds(task, DO_NOT_PANIC);
788 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
792 static void task_gen_update_config(struct task_gen *task)
794 if (task->token_time.cfg.bpp != task->new_rate_bps)
795 task_gen_reset_token_time(task);
796 if (task->new_imix_nb_pkts)
797 task_gen_apply_imix(task, DO_NOT_PANIC);
798 task->new_imix_nb_pkts = 0;
801 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
803 struct task_base *tbase = (struct task_base *)task;
805 build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
807 build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
810 register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
814 static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits)
816 struct task_base *tbase = (struct task_base *)task;
817 if (var_bit_pos < 32) {
818 build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
820 int byte_pos = (var_bit_pos + init_var_bit_pos) / 8;
821 int bit_pos = (var_bit_pos + init_var_bit_pos) % 8;
822 val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos);
823 build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
826 for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++)
827 val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i];
828 register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
832 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
834 struct task_base *tbase = (struct task_base *)task;
837 uint32_t mask, ip_len;
838 struct ipv6_addr *ip6_src = NULL;
841 for (uint32_t i = 0; i < task->n_pkts; ++i) {
842 struct pkt_template *pktpl = &task->pkt_template[i];
843 unsigned int ip_src_pos = 0;
845 unsigned int l2_len = sizeof(prox_rte_ether_hdr);
847 uint8_t *pkt = pktpl->buf;
848 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
849 uint16_t ether_type = eth_hdr->ether_type;
850 prox_rte_vlan_hdr *vlan_hdr;
851 prox_rte_ipv4_hdr *ip;
854 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) {
855 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len);
857 ether_type = vlan_hdr->eth_proto;
859 if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
861 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
862 if (ip->version_ihl >> 4 == 4)
864 else if (ip->version_ihl >> 4 != 6) // Version field at same location for IPv4 and IPv6
866 } else if (ether_type == ETYPE_IPv4) {
867 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
868 PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); // Invalid Packet
870 } else if (ether_type == ETYPE_IPv6) {
871 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
872 PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4); // Invalid Packet
877 PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n");
878 PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n");
880 // Even if IPv4 header contains options, options are after ip src and dst
881 ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t);
882 ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
883 plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
884 register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
885 ip_len = sizeof(uint32_t);
887 ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr);
888 ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos));
889 plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes));
890 register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
891 ip_len = sizeof(struct ipv6_addr);
894 for (int j = 0; j < task->n_rands; j++) {
895 offset = task->rand[j].rand_offset;
896 len = task->rand[j].rand_len;
897 mask = task->rand[j].rand_mask;
898 fixed = task->rand[j].fixed_bits;
899 plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
900 if (offset >= ip_src_pos + ip_len) // First random bit after IP
902 if (offset + len < ip_src_pos) // Last random bit before IP
906 if (offset >= ip_src_pos) {
907 int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
908 mask = mask & ip_src_mask;
909 fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
910 build_value(task, mask, 0, 0, fixed);
912 int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
914 fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
915 build_value(task, mask, 0, 0, fixed);
918 // We do not support when random partially covers IP - either starting before or finishing after
919 if (offset + len >= ip_src_pos + ip_len) { // len over the ip
920 plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
923 if (offset < ip_src_pos) {
924 plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
927 // Even for IPv6 the random mask supported by PROX are 32 bits only
928 struct ipv6_addr fixed_ipv6;
929 uint init_var_byte_pos = (offset - ip_src_pos);
930 for (uint i = 0; i < sizeof(struct ipv6_addr); i++) {
931 if (i < init_var_byte_pos)
932 fixed_ipv6.bytes[i] = ip6_src->bytes[i];
933 else if (i < init_var_byte_pos + len)
934 fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF;
936 fixed_ipv6.bytes[i] = ip6_src->bytes[i];
938 build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6);
944 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
946 struct task_gen *task = (struct task_gen *)tbase;
947 uint8_t out[MAX_PKT_BURST] = {0};
952 task_gen_update_config(task);
954 if (task->pkt_count == 0) {
955 task_gen_reset_token_time(task);
958 if (!task->token_time.cfg.bpp)
961 token_time_update(&task->token_time, rte_rdtsc());
963 uint32_t would_send_bytes;
964 uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
968 task_gen_take_count(task, send_bulk);
969 task_gen_consume_tokens(task, would_send_bytes, send_bulk);
971 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
972 if (new_pkts == NULL)
974 uint8_t *pkt_hdr[MAX_RING_BURST];
976 task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
977 task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
978 task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
979 task_gen_apply_all_ranges(task, pkt_hdr, send_bulk);
980 task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
981 task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
983 uint64_t tsc_before_tx;
985 tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
986 task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
987 ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
988 task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
990 // If we failed to send some packets, we need to do some clean-up:
993 // We need re-use the packets indexes not being sent
994 // Hence non-sent packets will not be considered as lost by the receiver when it looks at
995 // packet ids. This should also increase the percentage of packets used for latency measurements
996 task->pkt_queue_index -= ret;
998 // In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong
999 // This would result in under-estimated latency (up to 0 or negative)
1000 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret);
1001 task->earliest_tsc_next_pkt -= bulk_duration;
1006 static void init_task_gen_seeds(struct task_gen *task)
1008 for (size_t i = 0; i < sizeof(task->rand)/sizeof(task->rand[0]); ++i)
1009 random_init_seed(&task->rand[i].state);
1012 static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size)
1014 struct pcap_pkthdr header;
1017 *max_frame_size = 0;
1018 long pkt1_fpos = ftell(pcap_file(handle));
1020 while ((buf = pcap_next(handle, &header))) {
1021 if (header.len > *max_frame_size)
1022 *max_frame_size = header.len;
1025 int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
1026 PROX_PANIC(ret2 != 0, "Failed to reset reading pcap file\n");
1030 static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
1032 uint64_t tot_inter_pkt = 0;
1034 for (uint32_t i = 0; i < n; ++i)
1035 tot_inter_pkt += time_stamp[i];
1036 return (tot_inter_pkt + n / 2)/n;
1039 static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size)
1041 struct pcap_pkthdr header;
1045 for (uint32_t i = 0; i < n_pkts; ++i) {
1046 buf = pcap_next(handle, &header);
1048 PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
1049 proto[i].len = header.len;
1050 len = RTE_MIN(header.len, max_frame_size);
1051 if (header.len > len)
1052 plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
1055 static struct timeval beg;
1061 tv = tv_diff(&beg, &header.ts);
1062 tv_to_tsc(&tv, time_stamp + i);
1064 rte_memcpy(proto[i].buf, buf, len);
1067 if (time_stamp && n_pkts) {
1068 for (uint32_t i = n_pkts - 1; i > 0; --i)
1069 time_stamp[i] -= time_stamp[i - 1];
1070 /* Since the handle function will loop the packets,
1071 there is one time-stamp that is not provided by the
1072 pcap file. This is the time between the last and
1073 the first packet. This implementation takes the
1074 average of the inter-packet times here. */
1076 time_stamp[0] = avg_time_stamp(time_stamp + 1, n_pkts - 1);
1082 static int check_all_pkt_size(struct task_gen *task, int do_panic)
1085 for (uint32_t i = 0; i < task->n_pkts;++i) {
1086 if ((rc = check_pkt_size(task, task->pkt_template[i].len, do_panic)) != 0)
1092 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic)
1095 for (uint32_t i = 0; i < task->n_pkts;++i) {
1096 if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0)
1102 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
1104 struct pkt_template *template;
1106 for (size_t i = 0; i < task->n_pkts; ++i) {
1107 template = &task->pkt_template[i];
1108 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
1112 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
1114 struct pkt_template *template;
1115 prox_rte_ipv4_hdr *ip;
1117 task->runtime_checksum_needed = 0;
1118 for (size_t i = 0; i < task->n_pkts; ++i) {
1119 template = &task->pkt_template[i];
1120 if (template->l2_len == 0)
1122 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
1123 if (ip->version_ihl >> 4 == 4) {
1124 ip->hdr_checksum = 0;
1125 prox_ip_cksum_sw(ip);
1126 uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
1127 if (ip->next_proto_id == IPPROTO_UDP) {
1128 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
1129 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
1130 } else if (ip->next_proto_id == IPPROTO_TCP) {
1131 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
1132 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
1134 } else if (ip->version_ihl >> 4 == 6) {
1135 prox_rte_ipv6_hdr *ip6;
1136 ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len);
1137 if (ip6->proto == IPPROTO_UDP) {
1138 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
1139 udp->dgram_cksum = 0;
1140 udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
1141 } else if (ip6->proto == IPPROTO_TCP) {
1142 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
1144 tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
1148 /* The current implementation avoids checksum
1149 calculation by determining that at packet
1150 construction time, no fields are applied that would
1151 require a recalculation of the checksum. */
1152 if (task->lat_enabled && task->lat_pos > template->l2_len)
1153 task->runtime_checksum_needed = 1;
1154 if (task->accur_pos > template->l2_len)
1155 task->runtime_checksum_needed = 1;
1156 if (task->packet_id_pos > template->l2_len)
1157 task->runtime_checksum_needed = 1;
1161 static void task_gen_pkt_template_recalc_all(struct task_gen *task)
1163 task_gen_pkt_template_recalc_metadata(task);
1164 task_gen_pkt_template_recalc_checksum(task);
1167 static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes)
1169 struct pkt_template *src, *dst;
1171 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1172 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1173 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1174 dst->len = pkt_sizes[j];
1179 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
1181 struct pkt_template *src, *dst;
1183 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1184 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1185 src = &task->pkt_template_orig[i];
1186 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1187 dst->len = src->len;
1192 static void task_gen_reset_pkt_templates_content(struct task_gen *task)
1194 struct pkt_template *src, *dst;
1196 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1197 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1198 src = &task->pkt_template_orig[i];
1199 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1200 memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len));
1201 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1202 rte_memcpy(&dst->buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1204 task_gen_apply_sig(task, dst);
1209 static void task_gen_reset_pkt_templates(struct task_gen *task)
1211 if (task->imix_nb_pkts)
1212 task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
1214 task_gen_reset_pkt_templates_len(task);
1215 task_gen_reset_pkt_templates_content(task);
1216 task_gen_pkt_template_recalc_all(task);
1219 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
1223 task->orig_n_pkts = 1;
1224 if (task->imix_nb_pkts == 0) {
1226 task->imix_pkt_sizes[0] = targ->pkt_size;
1228 task->n_pkts = task->imix_nb_pkts;
1230 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP);
1232 rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size);
1233 task->pkt_template_orig[0].len = task->imix_pkt_sizes[0];
1234 task_gen_reset_pkt_templates(task);
1235 check_all_pkt_size(task, DO_PANIC);
1236 check_all_fields_in_bounds(task, DO_PANIC);
1238 // If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline
1239 // In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet
1240 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1243 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
1245 char err[PCAP_ERRBUF_SIZE];
1246 uint32_t max_frame_size;
1247 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1248 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1250 task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size);
1251 plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size);
1252 PROX_PANIC(max_frame_size > task->max_frame_size,
1253 max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ?
1254 "pkt_size too high and jumbo frames disabled" : "pkt_size > mtu");
1257 task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts);
1258 if (task->imix_nb_pkts == 0) {
1259 task->n_pkts = task->orig_n_pkts;
1261 task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
1263 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP);
1264 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1266 pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size);
1268 task_gen_reset_pkt_templates(task);
1269 check_all_pkt_size(task, DO_PANIC);
1270 check_all_fields_in_bounds(task, DO_PANIC);
1271 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1274 static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size)
1276 static char name[] = "gen_pool";
1277 struct rte_mempool *ret;
1278 const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
1281 uint32_t mbuf_size = TX_MBUF_SIZE;
1282 if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
1283 mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
1284 plog_info("\t\tCreating mempool with name '%s'\n", name);
1285 ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
1286 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
1287 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
1289 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
1290 sock_id, targ->nb_mbuf - 1);
1292 plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
1293 targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
1298 void task_gen_set_pkt_count(struct task_base *tbase, uint32_t count)
1300 struct task_gen *task = (struct task_gen *)tbase;
1302 task->pkt_count = count;
1305 int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
1307 struct task_gen *task = (struct task_gen *)tbase;
1310 for (size_t i = 0; i < task->n_pkts; ++i) {
1311 if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
1313 if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
1316 for (size_t i = 0; i < task->n_pkts; ++i) {
1317 task->pkt_template[i].len = pkt_size;
1322 int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
1324 struct task_gen *task = (struct task_gen *)tbase;
1327 memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t));
1328 for (size_t i = 0; i < nb_pkt_sizes; ++i) {
1329 if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1331 if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1334 // only set new_imix_nb_pkts if checks of pkt sizes succeeded
1335 task->new_imix_nb_pkts = nb_pkt_sizes;
1339 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
1341 struct task_gen *task = (struct task_gen *)tbase;
1343 task->new_rate_bps = bps;
1346 void task_gen_reset_randoms(struct task_base *tbase)
1348 struct task_gen *task = (struct task_gen *)tbase;
1350 for (uint32_t i = 0; i < task->n_rands; ++i) {
1351 task->rand[i].rand_mask = 0;
1352 task->rand[i].fixed_bits = 0;
1353 task->rand[i].rand_offset = 0;
1358 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
1360 struct task_gen *task = (struct task_gen *)tbase;
1362 if (offset + len > task->max_frame_size)
1364 for (size_t i = 0; i < task->n_pkts; ++i) {
1365 uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
1366 uint8_t *dst = task->pkt_template[i].buf;
1368 rte_memcpy(dst + offset, &to_write, len);
1371 task_gen_pkt_template_recalc_all(task);
1376 void task_gen_reset_values(struct task_base *tbase)
1378 struct task_gen *task = (struct task_gen *)tbase;
1380 task_gen_reset_pkt_templates_content(task);
1381 task_gen_pkt_template_recalc_metadata(task);
1382 check_all_pkt_size(task, DO_NOT_PANIC);
1383 check_all_fields_in_bounds(task, DO_NOT_PANIC);
1384 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1386 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1387 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1388 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1393 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
1395 struct task_gen *task = (struct task_gen *)tbase;
1397 return task->n_rands;
1400 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
1402 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1403 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1404 uint32_t max_frame_size;
1406 task->loop = targ->loop;
1408 task->hz = rte_get_tsc_hz();
1410 char err[PCAP_ERRBUF_SIZE];
1411 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1412 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1414 task->n_pkts = pcap_count_pkts(handle, &max_frame_size);
1415 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
1417 task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size);
1419 PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
1422 plogx_info("Configured to load %u packets\n", targ->n_pkts);
1423 if (task->n_pkts > targ->n_pkts)
1424 task->n_pkts = targ->n_pkts;
1426 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1428 size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
1429 uint8_t *mem = prox_zmalloc(mem_size, task->socket_id);
1431 PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
1432 task->proto = (struct pkt_template *) mem;
1433 task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
1435 for (uint i = 0; i < targ->n_pkts; i++) {
1436 task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id);
1437 PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size);
1440 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size);
1444 static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offset)
1446 for (uint32_t i = 0; i < task->n_rands; ++i) {
1447 if (task->rand[i].rand_offset == offset) {
1455 static int task_gen_add_range(struct task_base *tbase, struct range *range)
1457 struct task_gen *task = (struct task_gen *)tbase;
1458 if (task->n_ranges == MAX_RANGES) {
1459 plog_err("Too many ranges\n");
1462 task->ranges[task->n_ranges].min = range->min;
1463 task->ranges[task->n_ranges].value = range->min;
1464 uint32_t m = range->max;
1465 task->ranges[task->n_ranges].range_len = 0;
1468 task->ranges[task->n_ranges].range_len++;
1470 task->ranges[task->n_ranges].offset = range->offset;
1471 task->ranges[task->n_ranges++].max = range->max;
1475 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
1477 struct task_gen *task = (struct task_gen *)tbase;
1478 uint32_t existing_rand;
1480 if (rand_id == UINT32_MAX && task->n_rands == 64) {
1481 plog_err("Too many randoms\n");
1484 uint32_t mask, fixed, len;
1486 if (parse_random_str(&mask, &fixed, &len, rand_str)) {
1487 plog_err("%s\n", get_parse_err());
1490 task->runtime_checksum_needed = 1;
1492 existing_rand = task_gen_find_random_with_offset(task, offset);
1493 if (existing_rand != UINT32_MAX) {
1494 plog_warn("Random at offset %d already set => overwriting len = %d %s\n", offset, len, rand_str);
1495 rand_id = existing_rand;
1496 task->rand[rand_id].rand_len = len;
1497 task->rand[rand_id].rand_offset = offset;
1498 task->rand[rand_id].rand_mask = mask;
1499 task->rand[rand_id].fixed_bits = fixed;
1503 task->rand[task->n_rands].rand_len = len;
1504 task->rand[task->n_rands].rand_offset = offset;
1505 task->rand[task->n_rands].rand_mask = mask;
1506 task->rand[task->n_rands].fixed_bits = fixed;
1512 static void start(struct task_base *tbase)
1514 struct task_gen *task = (struct task_gen *)tbase;
1515 task->pkt_queue_index = 0;
1517 task_gen_reset_token_time(task);
1518 if (tbase->l3.tmaster) {
1519 register_all_ip_to_ctrl_plane(task);
1523 Handle the case when two tasks transmit to the same port
1524 and one of them is stopped. In that case ARP (requests or replies)
1525 might not be sent. Master will have to keep a list of rings.
1526 stop will have to de-register IP from ctrl plane.
1527 un-registration will remove the ring. when having more than
1528 one active rings, master can always use the first one
1532 static void start_pcap(struct task_base *tbase)
1534 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1535 /* When we start, the first packet is sent immediately. */
1536 task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
1540 static void init_task_gen_early(struct task_args *targ)
1542 uint8_t *generator_count = prox_sh_find_system("generator_count");
1544 if (generator_count == NULL) {
1545 generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id));
1546 PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n");
1547 prox_sh_add_system("generator_count", generator_count);
1549 targ->generator_id = *generator_count;
1550 (*generator_count)++;
1553 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
1555 struct task_gen *task = (struct task_gen *)tbase;
1556 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1558 task->packet_id_pos = targ->packet_id_pos;
1560 struct prox_port_cfg *port = find_reachable_port(targ);
1561 // TODO: check that all reachable ports have the same mtu...
1563 task->cksum_offload = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
1565 task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE;
1567 // Not generating to any port...
1568 task->max_frame_size = PROX_RTE_ETHER_MAX_LEN;
1570 task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size);
1571 PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
1573 task->hz = rte_get_tsc_hz();
1574 task->lat_pos = targ->lat_pos;
1575 task->accur_pos = targ->accur_pos;
1576 task->sig_pos = targ->sig_pos;
1577 task->sig = targ->sig;
1578 task->new_rate_bps = targ->rate_bps;
1581 * For tokens, use 10 Gbps as base rate
1582 * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
1583 * Script can query prox "port info" command to find out the port link speed to know
1584 * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
1585 * probably also to check the driver (as returned by the same "port info" command.
1587 struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
1588 token_time_init(&task->token_time, &tt_cfg);
1590 init_task_gen_seeds(task);
1592 task->min_bulk_size = targ->min_bulk_size;
1593 task->max_bulk_size = targ->max_bulk_size;
1594 if (task->min_bulk_size < 1)
1595 task->min_bulk_size = 1;
1596 if (task->max_bulk_size < 1)
1597 task->max_bulk_size = 64;
1598 PROX_PANIC(task->max_bulk_size > 64, "max_bulk_size higher than 64\n");
1599 PROX_PANIC(task->max_bulk_size < task->min_bulk_size, "max_bulk_size must be > than min_bulk_size\n");
1601 task->pkt_count = -1;
1602 task->lat_enabled = targ->lat_enabled;
1603 task->runtime_flags = targ->runtime_flags;
1604 PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
1606 task->generator_id = targ->generator_id;
1607 plog_info("\t\tGenerator id = %d\n", task->generator_id);
1609 // Allocate array holding bytes to tsc for supported frame sizes
1610 task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id);
1611 PROX_PANIC(task->bytes_to_tsc == NULL,
1612 "Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size);
1614 // task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
1615 // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
1616 uint64_t bytes_per_hz = UINT64_MAX;
1617 if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) {
1618 bytes_per_hz = task->port->max_link_speed * 125000L;
1619 plog_info("\t\tPort %u: max link speed is %ld Mbps\n",
1620 (uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
1622 // There are cases where hz estimate might be slighly over-estimated
1623 // This results in too much extrapolation
1624 // Only account for 99% of extrapolation to handle cases with up to 1% error clocks
1625 for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) {
1626 if (bytes_per_hz == UINT64_MAX)
1627 task->bytes_to_tsc[i] = 0;
1629 task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz;
1632 task->imix_nb_pkts = targ->imix_nb_pkts;
1633 for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) {
1634 task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i];
1636 if (!strcmp(targ->pcap_file, "")) {
1637 plog_info("\t\tUsing inline definition of a packet\n");
1638 task_init_gen_load_pkt_inline(task, targ);
1640 plog_info("\t\tLoading from pcap %s\n", targ->pcap_file);
1641 task_init_gen_load_pcap(task, targ);
1644 PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port");
1645 if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) {
1646 task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC;
1647 memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr));
1648 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1649 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1652 for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
1653 PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
1654 "Failed to add random\n");
1656 for (uint32_t i = 0; i < targ->n_ranges; ++i) {
1657 PROX_PANIC(task_gen_add_range(tbase, &targ->range[i]), "Failed to add range\n");
1661 static struct task_init task_init_gen = {
1663 .init = init_task_gen,
1664 .handle = handle_gen_bulk,
1666 .early_init = init_task_gen_early,
1668 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1669 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1670 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1672 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1674 .size = sizeof(struct task_gen)
1677 static struct task_init task_init_gen_l3 = {
1679 .sub_mode_str = "l3",
1680 .init = init_task_gen,
1681 .handle = handle_gen_bulk,
1683 .early_init = init_task_gen_early,
1685 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1686 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1687 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1689 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1691 .size = sizeof(struct task_gen)
1694 /* This mode uses time stamps in the pcap file */
1695 static struct task_init task_init_gen_pcap = {
1697 .sub_mode_str = "pcap",
1698 .init = init_task_gen_pcap,
1699 .handle = handle_gen_pcap_bulk,
1700 .start = start_pcap,
1701 .early_init = init_task_gen_early,
1703 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1705 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1707 .size = sizeof(struct task_gen_pcap)
1710 __attribute__((constructor)) static void reg_task_gen(void)
1712 reg_task(&task_init_gen);
1713 reg_task(&task_init_gen_l3);
1714 reg_task(&task_init_gen_pcap);