2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_cycles.h>
21 #include <rte_version.h>
22 #include <rte_byteorder.h>
23 #include <rte_ether.h>
24 #include <rte_hash_crc.h>
25 #include <rte_malloc.h>
27 #include "prox_shared.h"
29 #include "prox_malloc.h"
30 #include "handle_gen.h"
31 #include "handle_lat.h"
32 #include "task_init.h"
33 #include "task_base.h"
34 #include "prox_port_cfg.h"
39 #include "mbuf_utils.h"
41 #include "prox_cksum.h"
43 #include "prox_assert.h"
45 #include "token_time.h"
46 #include "local_mbuf.h"
49 #include "handle_master.h"
51 #include "prox_ipv6.h"
60 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
63 #define DO_NOT_PANIC 0
66 #define NOT_FROM_PCAP 0
68 #define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1
70 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
72 const uint32_t pkt_size = pkt_template->len;
74 rte_pktmbuf_pkt_len(mbuf) = pkt_size;
75 rte_pktmbuf_data_len(mbuf) = pkt_size;
77 rte_memcpy(pkt, pkt_template->buf, pkt_template->len);
80 struct task_gen_pcap {
81 struct task_base base;
83 struct local_mbuf local_mbuf;
85 struct pkt_template *proto;
94 struct task_base base;
96 struct token_time token_time;
97 struct local_mbuf local_mbuf;
98 struct pkt_template *pkt_template; /* packet templates used at runtime */
99 uint64_t write_duration_estimate; /* how long it took previously to write the time stamps in the packets */
100 uint64_t earliest_tsc_next_pkt;
101 uint64_t new_rate_bps;
102 uint64_t pkt_queue_index;
103 uint32_t n_pkts; /* number of packets in pcap */
104 uint32_t orig_n_pkts; /* number of packets in pcap */
105 uint32_t pkt_idx; /* current packet from pcap */
106 uint32_t pkt_count; /* how many pakets to generate */
107 uint32_t max_frame_size;
108 uint32_t runtime_flags;
110 uint16_t packet_id_pos;
115 uint8_t generator_id;
116 uint8_t n_rands; /* number of randoms */
117 uint8_t min_bulk_size;
118 uint8_t max_bulk_size;
120 uint8_t runtime_checksum_needed;
123 uint32_t rand_mask; /* since the random vals are uniform, masks don't introduce bias */
124 uint32_t fixed_bits; /* length of each random (max len = 4) */
125 uint16_t rand_offset; /* each random has an offset*/
126 uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
128 uint64_t accur[ACCURACY_WINDOW];
129 uint64_t pkt_tsc_offset[64];
130 struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
131 prox_rte_ether_addr src_mac;
133 uint8_t cksum_offload;
134 struct prox_port_cfg *port;
135 uint64_t *bytes_to_tsc;
136 uint32_t imix_pkt_sizes[MAX_IMIX_PKTS];
137 uint32_t imix_nb_pkts;
138 uint32_t new_imix_nb_pkts;
139 } __rte_cache_aligned;
141 static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes);
142 static void task_gen_reset_pkt_templates_content(struct task_gen *task);
143 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task);
144 static int check_all_pkt_size(struct task_gen *task, int do_panic);
145 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic);
147 static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip)
149 /* Optimize for common case of IPv4 header without options. */
150 if (ip->version_ihl == 0x45)
151 return sizeof(prox_rte_ipv4_hdr);
152 if (unlikely(ip->version_ihl >> 4 != 4)) {
153 plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
156 return (ip->version_ihl & 0xF) * 4;
159 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
161 *l2_len = sizeof(prox_rte_ether_hdr);
163 prox_rte_vlan_hdr *vlan_hdr;
164 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
165 prox_rte_ipv4_hdr *ip;
166 uint16_t ether_type = eth_hdr->ether_type;
169 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
170 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len);
172 ether_type = vlan_hdr->eth_proto;
175 // No L3 cksum offload for IPv6, but TODO L4 offload
176 // ETYPE_EoGRE CRC not implemented yet
178 switch (ether_type) {
192 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
197 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len);
198 if (ip->version_ihl >> 4 == 4)
199 *l3_len = ipv4_get_hdr_len(ip);
203 static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_template *pkt_template, int cksum_offload)
205 uint16_t l2_len = pkt_template->l2_len;
206 uint16_t l3_len = pkt_template->l3_len;
208 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len);
210 prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
211 } else if (ip->version_ihl >> 4 == 6) {
212 prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len);
213 if (ip6->proto == IPPROTO_UDP) {
214 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
215 udp->dgram_cksum = 0;
216 udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
217 } else if (ip6->proto == IPPROTO_TCP) {
218 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
220 tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
225 static void task_gen_reset_token_time(struct task_gen *task)
227 token_time_set_bpp(&task->token_time, task->new_rate_bps);
228 token_time_reset(&task->token_time, rte_rdtsc(), 0);
231 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
233 if (task->pkt_count == (uint32_t)-1)
236 if (task->pkt_count >= send_bulk)
237 task->pkt_count -= send_bulk;
243 static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts)
245 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
246 uint64_t now = rte_rdtsc();
247 uint64_t send_bulk = 0;
248 uint32_t pkt_idx_tmp = task->pkt_idx;
250 if (pkt_idx_tmp == task->n_pkts) {
251 PROX_ASSERT(task->loop);
255 for (uint16_t j = 0; j < 64; ++j) {
256 uint64_t tsc = task->proto_tsc[pkt_idx_tmp];
257 if (task->last_tsc + tsc <= now) {
258 task->last_tsc += tsc;
261 if (pkt_idx_tmp == task->n_pkts) {
272 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
273 if (new_pkts == NULL)
276 for (uint16_t j = 0; j < send_bulk; ++j) {
277 struct rte_mbuf *next_pkt = new_pkts[j];
278 struct pkt_template *pkt_template = &task->proto[task->pkt_idx];
279 uint8_t *hdr = rte_pktmbuf_mtod(next_pkt, uint8_t *);
281 pkt_template_init_mbuf(pkt_template, next_pkt, hdr);
284 if (task->pkt_idx == task->n_pkts) {
292 return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
295 static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
297 return task->bytes_to_tsc[bytes];
300 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
302 return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1;
305 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
307 return (task->pkt_idx + offset) % task->n_pkts;
310 static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *total_bytes)
312 /* The biggest bulk we allow to send is task->max_bulk_size
313 packets. The max bulk size can also be limited by the
314 pkt_count field. At the same time, we are rate limiting
315 based on the specified speed (in bytes per second) so token
316 bucket based rate limiting must also be applied. The
317 minimum bulk size is also constrained. If the calculated
318 bulk size is less then the minimum, then don't send
321 const uint32_t min_bulk = task->min_bulk_size;
322 uint32_t max_bulk = task->max_bulk_size;
324 if (task->pkt_count != (uint32_t)-1 && task->pkt_count < max_bulk) {
325 max_bulk = task->pkt_count;
328 uint32_t send_bulk = 0;
329 uint32_t pkt_idx_tmp = task->pkt_idx;
330 uint32_t would_send_bytes = 0;
334 * TODO - this must be improved to take into account the fact that, after applying randoms
335 * The packet can be replaced by an ARP
337 for (uint16_t j = 0; j < max_bulk; ++j) {
338 struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
339 pkt_size = pktpl->len;
340 uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
341 if (pkt_len + would_send_bytes > task->token_time.bytes_now)
344 pkt_idx_tmp = task_gen_next_pkt_idx(task, pkt_idx_tmp);
347 would_send_bytes += pkt_len;
350 if (send_bulk < min_bulk)
352 *total_bytes = would_send_bytes;
356 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
358 uint32_t ret, ret_tmp;
360 for (uint16_t i = 0; i < task->n_rands; ++i) {
361 ret = random_next(&task->rand[i].state);
362 ret_tmp = (ret & task->rand[i].rand_mask) | task->rand[i].fixed_bits;
364 ret_tmp = rte_bswap32(ret_tmp);
365 /* At this point, the lower order bytes (BE) contain
366 the generated value. The address where the values
367 of interest starts is at ret_tmp + 4 - rand_len. */
368 uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
369 rte_memcpy(hdr + task->rand[i].rand_offset, pret_tmp + 4 - task->rand[i].rand_len, task->rand[i].rand_len);
373 static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
378 for (uint16_t i = 0; i < count; ++i)
379 task_gen_apply_random_fields(task, pkt_hdr[i]);
382 static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
384 *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
387 static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst)
390 *(uint32_t *)(dst->buf + task->sig_pos) = task->sig;
393 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
395 if (!task->accur_pos)
398 /* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in
399 packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the
401 for (uint16_t j = 0; j < count; ++j) {
402 uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)];
403 task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
407 static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
409 struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_pos);
414 static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
416 if (!task->packet_id_pos)
419 for (uint16_t i = 0; i < count; ++i) {
421 unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
422 task_gen_apply_unique_id(task, pkt_hdr[i], &id);
426 static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
428 if (!(task->runtime_flags & TASK_TX_CRC))
431 if (!task->runtime_checksum_needed)
434 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
435 for (uint16_t i = 0; i < count; ++i) {
436 struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
437 checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
438 pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
442 static void task_gen_consume_tokens(struct task_gen *task, uint32_t tokens, uint32_t send_count)
444 /* If max burst has been sent, we can't keep up so just assume
445 that we can (leaving a "gap" in the packet stream on the
447 task->token_time.bytes_now -= tokens;
448 if (send_count == task->max_bulk_size && task->token_time.bytes_now > tokens) {
449 task->token_time.bytes_now = tokens;
453 static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t count)
455 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
456 struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
457 uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
458 #ifdef NO_EXTRAPOLATION
459 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1];
461 uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
462 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
465 return bulk_duration;
468 static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
470 if (!task->lat_enabled)
473 uint64_t tx_tsc, delta_t;
474 uint64_t tsc_before_tx = 0;
476 /* Just before sending the packets, apply the time stamp
477 relative to when the first packet will be sent. The first
478 packet will be sent now. The time is read for each packet
479 to reduce the error towards the actual time the packet will
481 uint64_t write_tsc_after, write_tsc_before;
483 write_tsc_before = rte_rdtsc();
485 /* The time it took previously to write the time stamps in the
486 packets is used as an estimate for how long it will take to
487 write the time stamps now. The estimated time at which the
488 packets will actually be sent will be at tx_tsc. */
489 tx_tsc = write_tsc_before + task->write_duration_estimate;
491 /* The offset delta_t tracks the difference between the actual
492 time and the time written in the packets. Adding the offset
493 to the actual time insures that the time written in the
494 packets is monotonically increasing. At the same time,
495 simply sleeping until delta_t is zero would leave a period
496 of silence on the line. The error has been introduced
497 earlier, but the packets have already been sent. */
499 /* This happens typically if previous bulk was delayed
500 by an interrupt e.g. (with Time in nsec)
501 Time x: sleep 4 microsec
502 Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes)
503 Time x+5000: send 16 packets (16 packets as 1000 nsec)
504 When we send the 16 packets, the 64 ealier packets are not yet
506 if (tx_tsc < task->earliest_tsc_next_pkt)
507 delta_t = task->earliest_tsc_next_pkt - tx_tsc;
511 for (uint16_t i = 0; i < count; ++i) {
512 uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
513 const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
514 *pos = pkt_tsc >> LATENCY_ACCURACY;
517 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
518 task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
519 write_tsc_after = rte_rdtsc();
520 task->write_duration_estimate = write_tsc_after - write_tsc_before;
522 /* Make sure that the time stamps that were written
523 are valid. The offset must be taken into account */
525 tsc_before_tx = rte_rdtsc();
526 } while (tsc_before_tx < tx_tsc);
528 return tsc_before_tx;
531 static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint64_t tsc_before_tx)
533 if (!task->accur_pos)
536 uint64_t accur = rte_rdtsc() - tsc_before_tx;
537 uint64_t first_accuracy_idx = task->pkt_queue_index - count;
539 for (uint32_t i = 0; i < count; ++i) {
540 uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1);
542 task->accur[accuracy_idx] = accur;
546 static void task_gen_load_and_prefetch(struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
548 for (uint16_t i = 0; i < count; ++i)
549 rte_prefetch0(mbufs[i]);
550 for (uint16_t i = 0; i < count; ++i)
551 pkt_hdr[i] = rte_pktmbuf_mtod(mbufs[i], uint8_t *);
552 for (uint16_t i = 0; i < count; ++i)
553 rte_prefetch0(pkt_hdr[i]);
556 static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
558 uint64_t will_send_bytes = 0;
560 for (uint16_t i = 0; i < count; ++i) {
561 struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
562 struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
563 pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
564 prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i];
565 if (task->lat_enabled) {
566 #ifdef NO_EXTRAPOLATION
567 task->pkt_tsc_offset[i] = 0;
569 task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
571 will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
573 task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
577 static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap)
579 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
580 size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template);
581 task->pkt_template = prox_zmalloc(mem_size, task->socket_id);
582 task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id);
584 if (task->pkt_template == NULL || task->pkt_template_orig == NULL) {
585 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template");
589 for (size_t i = 0; i < orig_nb_pkts; i++) {
590 task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
591 if (task->pkt_template_orig[i].buf == NULL) {
592 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
596 for (size_t i = 0; i < nb_pkts; i++) {
597 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
598 if (task->pkt_template[i].buf == NULL) {
599 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
606 static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic)
608 // Need to free up bufs allocated in previous (longer) imix
609 for (size_t i = nb_pkts; i < task->n_pkts; i++) {
610 if (task->pkt_template[i].buf) {
611 rte_free(task->pkt_template[i].buf);
612 task->pkt_template[i].buf = NULL;
616 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
617 struct pkt_template *ptr;
618 // re-allocate memory for new pkt_template (this might allocate additional memory or free up some...)
619 if ((ptr = rte_realloc_socket(task->pkt_template, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) {
620 task->pkt_template = ptr;
622 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size);
626 // Need to allocate bufs for new template but no need to reallocate for existing ones
627 for (size_t i = task->n_pkts; i < nb_pkts; ++i) {
628 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
629 if (task->pkt_template[i].buf == NULL) {
630 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i);
637 static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
639 const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr);
640 const uint16_t max_len = task->max_frame_size;
643 PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
644 PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
645 PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
649 plog_err("Invalid packet size length (no packet defined?)\n");
652 if (pkt_size > max_len) {
653 if (pkt_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4)
654 plog_err("pkt_size too high and jumbo frames disabled\n");
656 plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len);
659 if (pkt_size < min_len) {
660 plog_err("pkt_size out of range (must be >= %u)\n", min_len);
667 static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
669 if (task->lat_enabled) {
670 uint32_t pos_beg = task->lat_pos;
671 uint32_t pos_end = task->lat_pos + 3U;
674 PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
675 pos_beg, pos_end, pkt_size);
676 else if (pkt_size <= pos_end) {
677 plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
681 if (task->packet_id_pos) {
682 uint32_t pos_beg = task->packet_id_pos;
683 uint32_t pos_end = task->packet_id_pos + 4U;
686 PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
687 pos_beg, pos_end, pkt_size);
688 else if (pkt_size <= pos_end) {
689 plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
693 if (task->accur_pos) {
694 uint32_t pos_beg = task->accur_pos;
695 uint32_t pos_end = task->accur_pos + 3U;
698 PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n",
699 pos_beg, pos_end, pkt_size);
700 else if (pkt_size <= pos_end) {
701 plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
708 static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t n_orig_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
712 prox_rte_ipv4_hdr *ip;
713 struct pkt_template *template;
715 for (size_t j = 0; j < nb_pkt_sizes; ++j) {
716 for (size_t i = 0; i < n_orig_pkts; ++i) {
717 k = j * n_orig_pkts + i;
718 template = &task->pkt_template[k];
719 if (template->l2_len == 0)
721 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
722 ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len);
723 l4_len = pkt_sizes[j] - template->l2_len - template->l3_len;
724 ip->hdr_checksum = 0;
725 prox_ip_cksum_sw(ip);
727 if (ip->next_proto_id == IPPROTO_UDP) {
728 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
729 udp->dgram_len = rte_bswap16(l4_len);
730 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
731 } else if (ip->next_proto_id == IPPROTO_TCP) {
732 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
733 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
740 static int task_gen_apply_imix(struct task_gen *task, int do_panic)
742 struct pkt_template *ptr;
744 task->imix_nb_pkts = task->new_imix_nb_pkts;
745 uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
747 if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0))
750 task->n_pkts = n_pkts;
751 if (task->pkt_idx >= n_pkts)
753 task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
754 task_gen_reset_pkt_templates_content(task);
755 task_gen_pkt_template_recalc_metadata(task);
756 check_all_pkt_size(task, DO_NOT_PANIC);
757 check_all_fields_in_bounds(task, DO_NOT_PANIC);
758 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
762 static void task_gen_update_config(struct task_gen *task)
764 if (task->token_time.cfg.bpp != task->new_rate_bps)
765 task_gen_reset_token_time(task);
766 if (task->new_imix_nb_pkts)
767 task_gen_apply_imix(task, DO_NOT_PANIC);
768 task->new_imix_nb_pkts = 0;
771 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
773 struct task_base *tbase = (struct task_base *)task;
775 build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
777 build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
780 register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
784 static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits)
786 struct task_base *tbase = (struct task_base *)task;
787 if (var_bit_pos < 32) {
788 build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
790 int byte_pos = (var_bit_pos + init_var_bit_pos) / 8;
791 int bit_pos = (var_bit_pos + init_var_bit_pos) % 8;
792 val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos);
793 build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
796 for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++)
797 val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i];
798 register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
802 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
804 struct task_base *tbase = (struct task_base *)task;
807 uint32_t mask, ip_len;
808 struct ipv6_addr *ip6_src = NULL;
811 for (uint32_t i = 0; i < task->n_pkts; ++i) {
812 struct pkt_template *pktpl = &task->pkt_template[i];
813 unsigned int ip_src_pos = 0;
815 unsigned int l2_len = sizeof(prox_rte_ether_hdr);
817 uint8_t *pkt = pktpl->buf;
818 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
819 uint16_t ether_type = eth_hdr->ether_type;
820 prox_rte_vlan_hdr *vlan_hdr;
821 prox_rte_ipv4_hdr *ip;
824 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) {
825 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len);
827 ether_type = vlan_hdr->eth_proto;
829 if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
831 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
832 if (ip->version_ihl >> 4 == 4)
834 else if (ip->version_ihl >> 4 != 6) // Version field at same location for IPv4 and IPv6
836 } else if (ether_type == ETYPE_IPv4) {
837 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
838 PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); // Invalid Packet
840 } else if (ether_type == ETYPE_IPv6) {
841 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
842 PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4); // Invalid Packet
847 PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n");
848 PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n");
850 // Even if IPv4 header contains options, options are after ip src and dst
851 ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t);
852 ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
853 plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
854 register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
855 ip_len = sizeof(uint32_t);
857 ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr);
858 ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos));
859 plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes));
860 register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
861 ip_len = sizeof(struct ipv6_addr);
864 for (int j = 0; j < task->n_rands; j++) {
865 offset = task->rand[j].rand_offset;
866 len = task->rand[j].rand_len;
867 mask = task->rand[j].rand_mask;
868 fixed = task->rand[j].fixed_bits;
869 plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
870 if (offset >= ip_src_pos + ip_len) // First random bit after IP
872 if (offset + len < ip_src_pos) // Last random bit before IP
876 if (offset >= ip_src_pos) {
877 int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
878 mask = mask & ip_src_mask;
879 fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
880 build_value(task, mask, 0, 0, fixed);
882 int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
884 fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
885 build_value(task, mask, 0, 0, fixed);
888 // We do not support when random partially covers IP - either starting before or finishing after
889 if (offset + len >= ip_src_pos + ip_len) { // len over the ip
890 plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
893 if (offset < ip_src_pos) {
894 plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
897 // Even for IPv6 the random mask supported by PROX are 32 bits only
898 struct ipv6_addr fixed_ipv6;
899 uint init_var_byte_pos = (offset - ip_src_pos);
900 for (uint i = 0; i < sizeof(struct ipv6_addr); i++) {
901 if (i < init_var_byte_pos)
902 fixed_ipv6.bytes[i] = ip6_src->bytes[i];
903 else if (i < init_var_byte_pos + len)
904 fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF;
906 fixed_ipv6.bytes[i] = ip6_src->bytes[i];
908 build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6);
914 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
916 struct task_gen *task = (struct task_gen *)tbase;
917 uint8_t out[MAX_PKT_BURST] = {0};
922 task_gen_update_config(task);
924 if (task->pkt_count == 0) {
925 task_gen_reset_token_time(task);
928 if (!task->token_time.cfg.bpp)
931 token_time_update(&task->token_time, rte_rdtsc());
933 uint32_t would_send_bytes;
934 uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
938 task_gen_take_count(task, send_bulk);
939 task_gen_consume_tokens(task, would_send_bytes, send_bulk);
941 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
942 if (new_pkts == NULL)
944 uint8_t *pkt_hdr[MAX_RING_BURST];
946 task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
947 task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
948 task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
949 task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
950 task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
952 uint64_t tsc_before_tx;
954 tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
955 task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
956 ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
957 task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
959 // If we failed to send some packets, we need to do some clean-up:
962 // We need re-use the packets indexes not being sent
963 // Hence non-sent packets will not be considered as lost by the receiver when it looks at
964 // packet ids. This should also increase the percentage of packets used for latency measurements
965 task->pkt_queue_index -= ret;
967 // In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong
968 // This would result in under-estimated latency (up to 0 or negative)
969 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret);
970 task->earliest_tsc_next_pkt -= bulk_duration;
975 static void init_task_gen_seeds(struct task_gen *task)
977 for (size_t i = 0; i < sizeof(task->rand)/sizeof(task->rand[0]); ++i)
978 random_init_seed(&task->rand[i].state);
981 static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size)
983 struct pcap_pkthdr header;
987 long pkt1_fpos = ftell(pcap_file(handle));
989 while ((buf = pcap_next(handle, &header))) {
990 if (header.len > *max_frame_size)
991 *max_frame_size = header.len;
994 int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
995 PROX_PANIC(ret2 != 0, "Failed to reset reading pcap file\n");
999 static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
1001 uint64_t tot_inter_pkt = 0;
1003 for (uint32_t i = 0; i < n; ++i)
1004 tot_inter_pkt += time_stamp[i];
1005 return (tot_inter_pkt + n / 2)/n;
1008 static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size)
1010 struct pcap_pkthdr header;
1014 for (uint32_t i = 0; i < n_pkts; ++i) {
1015 buf = pcap_next(handle, &header);
1017 PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
1018 proto[i].len = header.len;
1019 len = RTE_MIN(header.len, max_frame_size);
1020 if (header.len > len)
1021 plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
1024 static struct timeval beg;
1030 tv = tv_diff(&beg, &header.ts);
1031 tv_to_tsc(&tv, time_stamp + i);
1033 rte_memcpy(proto[i].buf, buf, len);
1036 if (time_stamp && n_pkts) {
1037 for (uint32_t i = n_pkts - 1; i > 0; --i)
1038 time_stamp[i] -= time_stamp[i - 1];
1039 /* Since the handle function will loop the packets,
1040 there is one time-stamp that is not provided by the
1041 pcap file. This is the time between the last and
1042 the first packet. This implementation takes the
1043 average of the inter-packet times here. */
1045 time_stamp[0] = avg_time_stamp(time_stamp + 1, n_pkts - 1);
1051 static int check_all_pkt_size(struct task_gen *task, int do_panic)
1054 for (uint32_t i = 0; i < task->n_pkts;++i) {
1055 if ((rc = check_pkt_size(task, task->pkt_template[i].len, do_panic)) != 0)
1061 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic)
1064 for (uint32_t i = 0; i < task->n_pkts;++i) {
1065 if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0)
1071 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
1073 struct pkt_template *template;
1075 for (size_t i = 0; i < task->n_pkts; ++i) {
1076 template = &task->pkt_template[i];
1077 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
1081 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
1083 struct pkt_template *template;
1084 prox_rte_ipv4_hdr *ip;
1086 task->runtime_checksum_needed = 0;
1087 for (size_t i = 0; i < task->n_pkts; ++i) {
1088 template = &task->pkt_template[i];
1089 if (template->l2_len == 0)
1091 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
1092 if (ip->version_ihl >> 4 == 4) {
1093 ip->hdr_checksum = 0;
1094 prox_ip_cksum_sw(ip);
1095 uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
1096 if (ip->next_proto_id == IPPROTO_UDP) {
1097 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
1098 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
1099 } else if (ip->next_proto_id == IPPROTO_TCP) {
1100 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
1101 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
1103 } else if (ip->version_ihl >> 4 == 6) {
1104 prox_rte_ipv6_hdr *ip6;
1105 ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len);
1106 if (ip6->proto == IPPROTO_UDP) {
1107 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
1108 udp->dgram_cksum = 0;
1109 udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
1110 } else if (ip6->proto == IPPROTO_TCP) {
1111 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
1113 tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
1117 /* The current implementation avoids checksum
1118 calculation by determining that at packet
1119 construction time, no fields are applied that would
1120 require a recalculation of the checksum. */
1121 if (task->lat_enabled && task->lat_pos > template->l2_len)
1122 task->runtime_checksum_needed = 1;
1123 if (task->accur_pos > template->l2_len)
1124 task->runtime_checksum_needed = 1;
1125 if (task->packet_id_pos > template->l2_len)
1126 task->runtime_checksum_needed = 1;
1130 static void task_gen_pkt_template_recalc_all(struct task_gen *task)
1132 task_gen_pkt_template_recalc_metadata(task);
1133 task_gen_pkt_template_recalc_checksum(task);
1136 static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes)
1138 struct pkt_template *src, *dst;
1140 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1141 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1142 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1143 dst->len = pkt_sizes[j];
1148 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
1150 struct pkt_template *src, *dst;
1152 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1153 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1154 src = &task->pkt_template_orig[i];
1155 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1156 dst->len = src->len;
1161 static void task_gen_reset_pkt_templates_content(struct task_gen *task)
1163 struct pkt_template *src, *dst;
1165 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1166 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1167 src = &task->pkt_template_orig[i];
1168 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1169 memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len));
1170 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1171 rte_memcpy(&dst->buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1173 task_gen_apply_sig(task, dst);
1178 static void task_gen_reset_pkt_templates(struct task_gen *task)
1180 task_gen_reset_pkt_templates_len(task);
1181 task_gen_reset_pkt_templates_content(task);
1182 task_gen_pkt_template_recalc_all(task);
1185 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
1189 task->orig_n_pkts = 1;
1190 if (task->imix_nb_pkts == 0) {
1192 task->imix_pkt_sizes[0] = targ->pkt_size;
1194 task->n_pkts = task->imix_nb_pkts;
1196 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP);
1198 rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size);
1199 task->pkt_template_orig[0].len = task->imix_pkt_sizes[0];
1200 task_gen_reset_pkt_templates(task);
1201 check_all_pkt_size(task, DO_PANIC);
1202 check_all_fields_in_bounds(task, DO_PANIC);
1204 // If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline
1205 // In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet
1206 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1209 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
1211 char err[PCAP_ERRBUF_SIZE];
1212 uint32_t max_frame_size;
1213 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1214 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1216 task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size);
1217 plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size);
1218 PROX_PANIC(max_frame_size > task->max_frame_size,
1219 max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ?
1220 "pkt_size too high and jumbo frames disabled" : "pkt_size > mtu");
1223 task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts);
1224 if (task->imix_nb_pkts == 0) {
1225 task->n_pkts = task->orig_n_pkts;
1227 task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
1229 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP);
1230 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1232 pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size);
1234 task_gen_reset_pkt_templates(task);
1235 check_all_pkt_size(task, DO_PANIC);
1236 check_all_fields_in_bounds(task, DO_PANIC);
1237 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1240 static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size)
1242 static char name[] = "gen_pool";
1243 struct rte_mempool *ret;
1244 const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
1247 uint32_t mbuf_size = TX_MBUF_SIZE;
1248 if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
1249 mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
1250 plog_info("\tCreating mempool with name '%s'\n", name);
1251 ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
1252 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
1253 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
1255 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
1256 sock_id, targ->nb_mbuf - 1);
1258 plog_info("\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
1259 targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
1264 void task_gen_set_pkt_count(struct task_base *tbase, uint32_t count)
1266 struct task_gen *task = (struct task_gen *)tbase;
1268 task->pkt_count = count;
1271 int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
1273 struct task_gen *task = (struct task_gen *)tbase;
1276 for (size_t i = 0; i < task->n_pkts; ++i) {
1277 if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
1279 if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
1282 for (size_t i = 0; i < task->n_pkts; ++i) {
1283 task->pkt_template[i].len = pkt_size;
1288 int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
1290 struct task_gen *task = (struct task_gen *)tbase;
1293 memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t));
1294 for (size_t i = 0; i < nb_pkt_sizes; ++i) {
1295 if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1297 if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1300 // only set new_imix_nb_pkts if checks of pkt sizes succeeded
1301 task->new_imix_nb_pkts = nb_pkt_sizes;
1305 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
1307 struct task_gen *task = (struct task_gen *)tbase;
1309 task->new_rate_bps = bps;
1312 void task_gen_reset_randoms(struct task_base *tbase)
1314 struct task_gen *task = (struct task_gen *)tbase;
1316 for (uint32_t i = 0; i < task->n_rands; ++i) {
1317 task->rand[i].rand_mask = 0;
1318 task->rand[i].fixed_bits = 0;
1319 task->rand[i].rand_offset = 0;
1324 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
1326 struct task_gen *task = (struct task_gen *)tbase;
1328 if (offset + len > task->max_frame_size)
1330 for (size_t i = 0; i < task->n_pkts; ++i) {
1331 uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
1332 uint8_t *dst = task->pkt_template[i].buf;
1334 rte_memcpy(dst + offset, &to_write, len);
1337 task_gen_pkt_template_recalc_all(task);
1342 void task_gen_reset_values(struct task_base *tbase)
1344 struct task_gen *task = (struct task_gen *)tbase;
1346 task_gen_reset_pkt_templates_content(task);
1347 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1348 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1349 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1354 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
1356 struct task_gen *task = (struct task_gen *)tbase;
1358 return task->n_rands;
1361 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
1363 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1364 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1365 uint32_t max_frame_size;
1367 task->loop = targ->loop;
1369 task->hz = rte_get_tsc_hz();
1371 char err[PCAP_ERRBUF_SIZE];
1372 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1373 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1375 task->n_pkts = pcap_count_pkts(handle, &max_frame_size);
1376 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
1378 task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size);
1380 PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
1383 plogx_info("Configured to load %u packets\n", targ->n_pkts);
1384 if (task->n_pkts > targ->n_pkts)
1385 task->n_pkts = targ->n_pkts;
1387 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1389 size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
1390 uint8_t *mem = prox_zmalloc(mem_size, task->socket_id);
1392 PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
1393 task->proto = (struct pkt_template *) mem;
1394 task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
1396 for (uint i = 0; i < targ->n_pkts; i++) {
1397 task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id);
1398 PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size);
1401 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size);
1405 static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offset)
1407 for (uint32_t i = 0; i < task->n_rands; ++i) {
1408 if (task->rand[i].rand_offset == offset) {
1416 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
1418 struct task_gen *task = (struct task_gen *)tbase;
1419 uint32_t existing_rand;
1421 if (rand_id == UINT32_MAX && task->n_rands == 64) {
1422 plog_err("Too many randoms\n");
1425 uint32_t mask, fixed, len;
1427 if (parse_random_str(&mask, &fixed, &len, rand_str)) {
1428 plog_err("%s\n", get_parse_err());
1431 task->runtime_checksum_needed = 1;
1433 existing_rand = task_gen_find_random_with_offset(task, offset);
1434 if (existing_rand != UINT32_MAX) {
1435 plog_warn("Random at offset %d already set => overwriting len = %d %s\n", offset, len, rand_str);
1436 rand_id = existing_rand;
1437 task->rand[rand_id].rand_len = len;
1438 task->rand[rand_id].rand_offset = offset;
1439 task->rand[rand_id].rand_mask = mask;
1440 task->rand[rand_id].fixed_bits = fixed;
1444 task->rand[task->n_rands].rand_len = len;
1445 task->rand[task->n_rands].rand_offset = offset;
1446 task->rand[task->n_rands].rand_mask = mask;
1447 task->rand[task->n_rands].fixed_bits = fixed;
1453 static void start(struct task_base *tbase)
1455 struct task_gen *task = (struct task_gen *)tbase;
1456 task->pkt_queue_index = 0;
1458 task_gen_reset_token_time(task);
1459 if (tbase->l3.tmaster) {
1460 register_all_ip_to_ctrl_plane(task);
1464 Handle the case when two tasks transmit to the same port
1465 and one of them is stopped. In that case ARP (requests or replies)
1466 might not be sent. Master will have to keep a list of rings.
1467 stop will have to de-register IP from ctrl plane.
1468 un-registration will remove the ring. when having more than
1469 one active rings, master can always use the first one
1473 static void start_pcap(struct task_base *tbase)
1475 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1476 /* When we start, the first packet is sent immediately. */
1477 task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
1481 static void init_task_gen_early(struct task_args *targ)
1483 uint8_t *generator_count = prox_sh_find_system("generator_count");
1485 if (generator_count == NULL) {
1486 generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id));
1487 PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n");
1488 prox_sh_add_system("generator_count", generator_count);
1490 targ->generator_id = *generator_count;
1491 (*generator_count)++;
1494 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
1496 struct task_gen *task = (struct task_gen *)tbase;
1497 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1499 task->packet_id_pos = targ->packet_id_pos;
1501 struct prox_port_cfg *port = find_reachable_port(targ);
1502 // TODO: check that all reachable ports have the same mtu...
1504 task->cksum_offload = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
1506 task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE;
1508 // Not generating to any port...
1509 task->max_frame_size = PROX_RTE_ETHER_MAX_LEN;
1511 task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size);
1512 PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
1514 task->hz = rte_get_tsc_hz();
1515 task->lat_pos = targ->lat_pos;
1516 task->accur_pos = targ->accur_pos;
1517 task->sig_pos = targ->sig_pos;
1518 task->sig = targ->sig;
1519 task->new_rate_bps = targ->rate_bps;
1522 * For tokens, use 10 Gbps as base rate
1523 * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
1524 * Script can query prox "port info" command to find out the port link speed to know
1525 * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
1526 * probably also to check the driver (as returned by the same "port info" command.
1528 struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
1529 token_time_init(&task->token_time, &tt_cfg);
1531 init_task_gen_seeds(task);
1533 task->min_bulk_size = targ->min_bulk_size;
1534 task->max_bulk_size = targ->max_bulk_size;
1535 if (task->min_bulk_size < 1)
1536 task->min_bulk_size = 1;
1537 if (task->max_bulk_size < 1)
1538 task->max_bulk_size = 64;
1539 PROX_PANIC(task->max_bulk_size > 64, "max_bulk_size higher than 64\n");
1540 PROX_PANIC(task->max_bulk_size < task->min_bulk_size, "max_bulk_size must be > than min_bulk_size\n");
1542 task->pkt_count = -1;
1543 task->lat_enabled = targ->lat_enabled;
1544 task->runtime_flags = targ->runtime_flags;
1545 PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
1547 task->generator_id = targ->generator_id;
1548 plog_info("\tGenerator id = %d\n", task->generator_id);
1550 // Allocate array holding bytes to tsc for supported frame sizes
1551 task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id);
1552 PROX_PANIC(task->bytes_to_tsc == NULL,
1553 "Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size);
1555 // task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
1556 // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
1557 uint64_t bytes_per_hz = UINT64_MAX;
1558 if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) {
1559 bytes_per_hz = task->port->max_link_speed * 125000L;
1560 plog_info("\tPort %u: max link speed is %ld Mbps\n",
1561 (uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
1563 // There are cases where hz estimate might be slighly over-estimated
1564 // This results in too much extrapolation
1565 // Only account for 99% of extrapolation to handle cases with up to 1% error clocks
1566 for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) {
1567 if (bytes_per_hz == UINT64_MAX)
1568 task->bytes_to_tsc[i] = 0;
1570 task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz;
1573 task->imix_nb_pkts = targ->imix_nb_pkts;
1574 for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) {
1575 task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i];
1577 if (!strcmp(targ->pcap_file, "")) {
1578 plog_info("\tUsing inline definition of a packet\n");
1579 task_init_gen_load_pkt_inline(task, targ);
1581 plog_info("Loading from pcap %s\n", targ->pcap_file);
1582 task_init_gen_load_pcap(task, targ);
1585 PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port");
1586 if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) {
1587 task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC;
1588 memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr));
1589 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1590 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1593 for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
1594 PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
1595 "Failed to add random\n");
1599 static struct task_init task_init_gen = {
1601 .init = init_task_gen,
1602 .handle = handle_gen_bulk,
1604 .early_init = init_task_gen_early,
1606 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1607 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1608 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1610 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1612 .size = sizeof(struct task_gen)
1615 static struct task_init task_init_gen_l3 = {
1617 .sub_mode_str = "l3",
1618 .init = init_task_gen,
1619 .handle = handle_gen_bulk,
1621 .early_init = init_task_gen_early,
1623 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1624 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1625 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1627 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1629 .size = sizeof(struct task_gen)
1632 /* This mode uses time stamps in the pcap file */
1633 static struct task_init task_init_gen_pcap = {
1635 .sub_mode_str = "pcap",
1636 .init = init_task_gen_pcap,
1637 .handle = handle_gen_pcap_bulk,
1638 .start = start_pcap,
1639 .early_init = init_task_gen_early,
1641 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1643 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1645 .size = sizeof(struct task_gen_pcap)
1648 __attribute__((constructor)) static void reg_task_gen(void)
1650 reg_task(&task_init_gen);
1651 reg_task(&task_init_gen_l3);
1652 reg_task(&task_init_gen_pcap);