2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_cycles.h>
21 #include <rte_version.h>
22 #include <rte_byteorder.h>
23 #include <rte_ether.h>
24 #include <rte_hash_crc.h>
25 #include <rte_malloc.h>
27 #include "prox_shared.h"
29 #include "prox_malloc.h"
30 #include "handle_gen.h"
31 #include "handle_lat.h"
32 #include "task_init.h"
33 #include "task_base.h"
34 #include "prox_port_cfg.h"
39 #include "mbuf_utils.h"
41 #include "prox_cksum.h"
43 #include "prox_assert.h"
45 #include "token_time.h"
46 #include "local_mbuf.h"
49 #include "handle_master.h"
51 #include "prox_ipv6.h"
60 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
63 #define DO_NOT_PANIC 0
66 #define NOT_FROM_PCAP 0
68 #define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1
70 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
72 const uint32_t pkt_size = pkt_template->len;
74 rte_pktmbuf_pkt_len(mbuf) = pkt_size;
75 rte_pktmbuf_data_len(mbuf) = pkt_size;
77 rte_memcpy(pkt, pkt_template->buf, pkt_template->len);
80 struct task_gen_pcap {
81 struct task_base base;
83 struct local_mbuf local_mbuf;
85 struct pkt_template *proto;
94 struct task_base base;
96 struct token_time token_time;
97 struct local_mbuf local_mbuf;
98 struct pkt_template *pkt_template; /* packet templates used at runtime */
99 uint64_t write_duration_estimate; /* how long it took previously to write the time stamps in the packets */
100 uint64_t earliest_tsc_next_pkt;
101 uint64_t new_rate_bps;
102 uint64_t pkt_queue_index;
103 uint32_t n_pkts; /* number of packets in pcap */
104 uint32_t orig_n_pkts; /* number of packets in pcap */
105 uint32_t pkt_idx; /* current packet from pcap */
106 uint32_t pkt_count; /* how many pakets to generate */
107 uint32_t max_frame_size;
108 uint32_t runtime_flags;
110 uint16_t packet_id_pos;
115 uint8_t generator_id;
116 uint8_t n_rands; /* number of randoms */
117 uint8_t min_bulk_size;
118 uint8_t max_bulk_size;
120 uint8_t runtime_checksum_needed;
123 uint32_t rand_mask; /* since the random vals are uniform, masks don't introduce bias */
124 uint32_t fixed_bits; /* length of each random (max len = 4) */
125 uint16_t rand_offset; /* each random has an offset*/
126 uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
128 uint64_t accur[ACCURACY_WINDOW];
129 uint64_t pkt_tsc_offset[64];
130 struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
131 prox_rte_ether_addr src_mac;
133 uint8_t cksum_offload;
134 struct prox_port_cfg *port;
135 uint64_t *bytes_to_tsc;
136 uint32_t imix_pkt_sizes[MAX_IMIX_PKTS];
137 uint32_t imix_nb_pkts;
138 uint32_t new_imix_nb_pkts;
139 } __rte_cache_aligned;
141 static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes);
142 static void task_gen_reset_pkt_templates_content(struct task_gen *task);
143 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task);
144 static int check_all_pkt_size(struct task_gen *task, int do_panic);
145 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic);
147 static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip)
149 /* Optimize for common case of IPv4 header without options. */
150 if (ip->version_ihl == 0x45)
151 return sizeof(prox_rte_ipv4_hdr);
152 if (unlikely(ip->version_ihl >> 4 != 4)) {
153 plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
156 return (ip->version_ihl & 0xF) * 4;
159 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
161 *l2_len = sizeof(prox_rte_ether_hdr);
163 prox_rte_vlan_hdr *vlan_hdr;
164 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
165 prox_rte_ipv4_hdr *ip;
166 uint16_t ether_type = eth_hdr->ether_type;
169 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
170 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len);
172 ether_type = vlan_hdr->eth_proto;
175 // No L3 cksum offload for IPv6, but TODO L4 offload
176 // ETYPE_EoGRE CRC not implemented yet
178 switch (ether_type) {
192 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
197 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len);
198 if (ip->version_ihl >> 4 == 4)
199 *l3_len = ipv4_get_hdr_len(ip);
203 static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_template *pkt_template, int cksum_offload)
205 uint16_t l2_len = pkt_template->l2_len;
206 uint16_t l3_len = pkt_template->l3_len;
208 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len);
210 prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
211 } else if (ip->version_ihl >> 4 == 6) {
212 prox_rte_ipv6_hdr *ip6 = (prox_rte_ipv6_hdr *)(hdr + l2_len);
213 if (ip6->proto == IPPROTO_UDP) {
214 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
215 udp->dgram_cksum = 0;
216 udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
217 } else if (ip6->proto == IPPROTO_TCP) {
218 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
220 tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
225 static void task_gen_reset_token_time(struct task_gen *task)
227 token_time_set_bpp(&task->token_time, task->new_rate_bps);
228 token_time_reset(&task->token_time, rte_rdtsc(), 0);
231 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
233 if (task->pkt_count == (uint32_t)-1)
236 if (task->pkt_count >= send_bulk)
237 task->pkt_count -= send_bulk;
243 static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts)
245 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
246 uint64_t now = rte_rdtsc();
247 uint64_t send_bulk = 0;
248 uint32_t pkt_idx_tmp = task->pkt_idx;
250 if (pkt_idx_tmp == task->n_pkts) {
251 PROX_ASSERT(task->loop);
255 for (uint16_t j = 0; j < 64; ++j) {
256 uint64_t tsc = task->proto_tsc[pkt_idx_tmp];
257 if (task->last_tsc + tsc <= now) {
258 task->last_tsc += tsc;
261 if (pkt_idx_tmp == task->n_pkts) {
272 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
273 if (new_pkts == NULL)
276 for (uint16_t j = 0; j < send_bulk; ++j) {
277 struct rte_mbuf *next_pkt = new_pkts[j];
278 struct pkt_template *pkt_template = &task->proto[task->pkt_idx];
279 uint8_t *hdr = rte_pktmbuf_mtod(next_pkt, uint8_t *);
281 pkt_template_init_mbuf(pkt_template, next_pkt, hdr);
284 if (task->pkt_idx == task->n_pkts) {
292 return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
295 static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
297 return task->bytes_to_tsc[bytes];
300 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
302 return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1;
305 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
307 return (task->pkt_idx + offset) % task->n_pkts;
310 static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *total_bytes)
312 /* The biggest bulk we allow to send is task->max_bulk_size
313 packets. The max bulk size can also be limited by the
314 pkt_count field. At the same time, we are rate limiting
315 based on the specified speed (in bytes per second) so token
316 bucket based rate limiting must also be applied. The
317 minimum bulk size is also constrained. If the calculated
318 bulk size is less then the minimum, then don't send
321 const uint32_t min_bulk = task->min_bulk_size;
322 uint32_t max_bulk = task->max_bulk_size;
324 if (task->pkt_count != (uint32_t)-1 && task->pkt_count < max_bulk) {
325 max_bulk = task->pkt_count;
328 uint32_t send_bulk = 0;
329 uint32_t pkt_idx_tmp = task->pkt_idx;
330 uint32_t would_send_bytes = 0;
334 * TODO - this must be improved to take into account the fact that, after applying randoms
335 * The packet can be replaced by an ARP
337 for (uint16_t j = 0; j < max_bulk; ++j) {
338 struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
339 pkt_size = pktpl->len;
340 uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
341 if (pkt_len + would_send_bytes > task->token_time.bytes_now)
344 pkt_idx_tmp = task_gen_next_pkt_idx(task, pkt_idx_tmp);
347 would_send_bytes += pkt_len;
350 if (send_bulk < min_bulk)
352 *total_bytes = would_send_bytes;
356 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
358 uint32_t ret, ret_tmp;
360 for (uint16_t i = 0; i < task->n_rands; ++i) {
361 ret = random_next(&task->rand[i].state);
362 ret_tmp = (ret & task->rand[i].rand_mask) | task->rand[i].fixed_bits;
364 ret_tmp = rte_bswap32(ret_tmp);
365 /* At this point, the lower order bytes (BE) contain
366 the generated value. The address where the values
367 of interest starts is at ret_tmp + 4 - rand_len. */
368 uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
369 rte_memcpy(hdr + task->rand[i].rand_offset, pret_tmp + 4 - task->rand[i].rand_len, task->rand[i].rand_len);
373 static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
378 for (uint16_t i = 0; i < count; ++i)
379 task_gen_apply_random_fields(task, pkt_hdr[i]);
382 static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
384 *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
387 static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst)
390 *(uint32_t *)(dst->buf + task->sig_pos) = task->sig;
393 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
395 if (!task->accur_pos)
398 /* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in
399 packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the
401 for (uint16_t j = 0; j < count; ++j) {
402 uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)];
403 task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
407 static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
409 struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_pos);
414 static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
416 if (!task->packet_id_pos)
419 for (uint16_t i = 0; i < count; ++i) {
421 unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
422 task_gen_apply_unique_id(task, pkt_hdr[i], &id);
426 static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
428 if (!(task->runtime_flags & TASK_TX_CRC))
431 if (!task->runtime_checksum_needed)
434 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
435 for (uint16_t i = 0; i < count; ++i) {
436 struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
437 checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
438 pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
442 static void task_gen_consume_tokens(struct task_gen *task, uint32_t tokens, uint32_t send_count)
444 /* If max burst has been sent, we can't keep up so just assume
445 that we can (leaving a "gap" in the packet stream on the
447 task->token_time.bytes_now -= tokens;
448 if (send_count == task->max_bulk_size && task->token_time.bytes_now > tokens) {
449 task->token_time.bytes_now = tokens;
453 static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t count)
455 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
456 struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
457 uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
458 #ifdef NO_EXTRAPOLATION
459 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1];
461 uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
462 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
465 return bulk_duration;
468 static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
470 if (!task->lat_enabled)
473 uint64_t tx_tsc, delta_t;
474 uint64_t tsc_before_tx = 0;
476 /* Just before sending the packets, apply the time stamp
477 relative to when the first packet will be sent. The first
478 packet will be sent now. The time is read for each packet
479 to reduce the error towards the actual time the packet will
481 uint64_t write_tsc_after, write_tsc_before;
483 write_tsc_before = rte_rdtsc();
485 /* The time it took previously to write the time stamps in the
486 packets is used as an estimate for how long it will take to
487 write the time stamps now. The estimated time at which the
488 packets will actually be sent will be at tx_tsc. */
489 tx_tsc = write_tsc_before + task->write_duration_estimate;
491 /* The offset delta_t tracks the difference between the actual
492 time and the time written in the packets. Adding the offset
493 to the actual time insures that the time written in the
494 packets is monotonically increasing. At the same time,
495 simply sleeping until delta_t is zero would leave a period
496 of silence on the line. The error has been introduced
497 earlier, but the packets have already been sent. */
499 /* This happens typically if previous bulk was delayed
500 by an interrupt e.g. (with Time in nsec)
501 Time x: sleep 4 microsec
502 Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes)
503 Time x+5000: send 16 packets (16 packets as 1000 nsec)
504 When we send the 16 packets, the 64 ealier packets are not yet
506 if (tx_tsc < task->earliest_tsc_next_pkt)
507 delta_t = task->earliest_tsc_next_pkt - tx_tsc;
511 for (uint16_t i = 0; i < count; ++i) {
512 uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
513 const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
514 *pos = pkt_tsc >> LATENCY_ACCURACY;
517 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
518 task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
519 write_tsc_after = rte_rdtsc();
520 task->write_duration_estimate = write_tsc_after - write_tsc_before;
522 /* Make sure that the time stamps that were written
523 are valid. The offset must be taken into account */
525 tsc_before_tx = rte_rdtsc();
526 } while (tsc_before_tx < tx_tsc);
528 return tsc_before_tx;
531 static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint64_t tsc_before_tx)
533 if (!task->accur_pos)
536 uint64_t accur = rte_rdtsc() - tsc_before_tx;
537 uint64_t first_accuracy_idx = task->pkt_queue_index - count;
539 for (uint32_t i = 0; i < count; ++i) {
540 uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1);
542 task->accur[accuracy_idx] = accur;
546 static void task_gen_load_and_prefetch(struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
548 for (uint16_t i = 0; i < count; ++i)
549 rte_prefetch0(mbufs[i]);
550 for (uint16_t i = 0; i < count; ++i)
551 pkt_hdr[i] = rte_pktmbuf_mtod(mbufs[i], uint8_t *);
552 for (uint16_t i = 0; i < count; ++i)
553 rte_prefetch0(pkt_hdr[i]);
556 static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
558 uint64_t will_send_bytes = 0;
560 for (uint16_t i = 0; i < count; ++i) {
561 struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
562 struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
563 pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
564 prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i];
565 if (task->lat_enabled) {
566 #ifdef NO_EXTRAPOLATION
567 task->pkt_tsc_offset[i] = 0;
569 task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
571 will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
573 task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
577 static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap)
579 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
580 size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template);
581 task->pkt_template = prox_zmalloc(mem_size, task->socket_id);
582 task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id);
584 if (task->pkt_template == NULL || task->pkt_template_orig == NULL) {
585 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template");
589 for (size_t i = 0; i < orig_nb_pkts; i++) {
590 task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
591 if (task->pkt_template_orig[i].buf == NULL) {
592 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
596 for (size_t i = 0; i < nb_pkts; i++) {
597 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
598 if (task->pkt_template[i].buf == NULL) {
599 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
606 static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic)
608 // Need to free up bufs allocated in previous (longer) imix
609 for (size_t i = nb_pkts; i < task->n_pkts; i++) {
610 if (task->pkt_template[i].buf) {
611 rte_free(task->pkt_template[i].buf);
612 task->pkt_template[i].buf = NULL;
616 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
617 size_t old_mem_size = task->n_pkts * sizeof(*task->pkt_template);
618 if (old_mem_size > mem_size)
619 old_mem_size = mem_size;
621 struct pkt_template *ptr;
623 // re-allocate memory for new pkt_template (this might allocate additional memory or free up some...)
624 if ((ptr = rte_malloc_socket(NULL, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) {
625 memcpy(ptr, task->pkt_template, old_mem_size);
626 rte_free(task->pkt_template);
627 task->pkt_template = ptr;
629 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size);
633 // Need to allocate bufs for new template but no need to reallocate for existing ones
634 for (size_t i = task->n_pkts; i < nb_pkts; ++i) {
635 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
636 if (task->pkt_template[i].buf == NULL) {
637 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i);
644 static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
646 const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr);
647 const uint16_t max_len = task->max_frame_size;
650 PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
651 PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
652 PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
656 plog_err("Invalid packet size length (no packet defined?)\n");
659 if (pkt_size > max_len) {
660 if (pkt_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4)
661 plog_err("pkt_size too high and jumbo frames disabled\n");
663 plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len);
666 if (pkt_size < min_len) {
667 plog_err("pkt_size out of range (must be >= %u)\n", min_len);
674 static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
676 if (task->lat_enabled) {
677 uint32_t pos_beg = task->lat_pos;
678 uint32_t pos_end = task->lat_pos + 3U;
681 PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
682 pos_beg, pos_end, pkt_size);
683 else if (pkt_size <= pos_end) {
684 plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
688 if (task->packet_id_pos) {
689 uint32_t pos_beg = task->packet_id_pos;
690 uint32_t pos_end = task->packet_id_pos + 4U;
693 PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
694 pos_beg, pos_end, pkt_size);
695 else if (pkt_size <= pos_end) {
696 plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
700 if (task->accur_pos) {
701 uint32_t pos_beg = task->accur_pos;
702 uint32_t pos_end = task->accur_pos + 3U;
705 PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n",
706 pos_beg, pos_end, pkt_size);
707 else if (pkt_size <= pos_end) {
708 plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
715 static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t n_orig_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
719 prox_rte_ipv4_hdr *ip;
720 struct pkt_template *template;
722 for (size_t j = 0; j < nb_pkt_sizes; ++j) {
723 for (size_t i = 0; i < n_orig_pkts; ++i) {
724 k = j * n_orig_pkts + i;
725 template = &task->pkt_template[k];
726 if (template->l2_len == 0)
728 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
729 ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len);
730 l4_len = pkt_sizes[j] - template->l2_len - template->l3_len;
731 ip->hdr_checksum = 0;
732 prox_ip_cksum_sw(ip);
734 if (ip->next_proto_id == IPPROTO_UDP) {
735 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
736 udp->dgram_len = rte_bswap16(l4_len);
737 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
738 } else if (ip->next_proto_id == IPPROTO_TCP) {
739 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
740 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
747 static int task_gen_apply_imix(struct task_gen *task, int do_panic)
749 struct pkt_template *ptr;
751 task->imix_nb_pkts = task->new_imix_nb_pkts;
752 uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
754 if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0))
757 task->n_pkts = n_pkts;
758 if (task->pkt_idx >= n_pkts)
760 task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
761 task_gen_reset_pkt_templates_content(task);
762 task_gen_pkt_template_recalc_metadata(task);
763 check_all_pkt_size(task, DO_NOT_PANIC);
764 check_all_fields_in_bounds(task, DO_NOT_PANIC);
765 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
769 static void task_gen_update_config(struct task_gen *task)
771 if (task->token_time.cfg.bpp != task->new_rate_bps)
772 task_gen_reset_token_time(task);
773 if (task->new_imix_nb_pkts)
774 task_gen_apply_imix(task, DO_NOT_PANIC);
775 task->new_imix_nb_pkts = 0;
778 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
780 struct task_base *tbase = (struct task_base *)task;
782 build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
784 build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
787 register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
791 static inline void build_value_ipv6(struct task_gen *task, uint32_t mask, int var_bit_pos, int init_var_bit_pos, struct ipv6_addr val, struct ipv6_addr fixed_bits)
793 struct task_base *tbase = (struct task_base *)task;
794 if (var_bit_pos < 32) {
795 build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
797 int byte_pos = (var_bit_pos + init_var_bit_pos) / 8;
798 int bit_pos = (var_bit_pos + init_var_bit_pos) % 8;
799 val.bytes[byte_pos] = val.bytes[byte_pos] | (1 << bit_pos);
800 build_value_ipv6(task, mask >> 1, var_bit_pos + 1, init_var_bit_pos, val, fixed_bits);
803 for (uint i = 0; i < sizeof(struct ipv6_addr) / 8; i++)
804 val.bytes[i] = val.bytes[i] | fixed_bits.bytes[i];
805 register_node_to_ctrl_plane(tbase->l3.tmaster, &null_addr, &val, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
809 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
811 struct task_base *tbase = (struct task_base *)task;
814 uint32_t mask, ip_len;
815 struct ipv6_addr *ip6_src = NULL;
818 for (uint32_t i = 0; i < task->n_pkts; ++i) {
819 struct pkt_template *pktpl = &task->pkt_template[i];
820 unsigned int ip_src_pos = 0;
822 unsigned int l2_len = sizeof(prox_rte_ether_hdr);
824 uint8_t *pkt = pktpl->buf;
825 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
826 uint16_t ether_type = eth_hdr->ether_type;
827 prox_rte_vlan_hdr *vlan_hdr;
828 prox_rte_ipv4_hdr *ip;
831 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) {
832 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len);
834 ether_type = vlan_hdr->eth_proto;
836 if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
838 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
839 if (ip->version_ihl >> 4 == 4)
841 else if (ip->version_ihl >> 4 != 6) // Version field at same location for IPv4 and IPv6
843 } else if (ether_type == ETYPE_IPv4) {
844 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
845 PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4); // Invalid Packet
847 } else if (ether_type == ETYPE_IPv6) {
848 ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
849 PROX_PANIC(ip->version_ihl >> 4 != 6, "IPv6 ether_type but IP version = %d != 6", ip->version_ihl >> 4); // Invalid Packet
854 PROX_PANIC(ipv4 && ((prox_cfg.flags & DSF_L3_ENABLED) == 0), "Trying to generate an IPv4 packet in NDP mode => not supported\n");
855 PROX_PANIC((ipv4 == 0) && ((prox_cfg.flags & DSF_NDP_ENABLED) == 0), "Trying to generate an IPv6 packet in L3 (IPv4) mode => not supported\n");
857 // Even if IPv4 header contains options, options are after ip src and dst
858 ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t);
859 ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
860 plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
861 register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
862 ip_len = sizeof(uint32_t);
864 ip_src_pos = l2_len + sizeof(prox_rte_ipv6_hdr) - 2 * sizeof(struct ipv6_addr);
865 ip6_src = ((struct ipv6_addr *)(pktpl->buf + ip_src_pos));
866 plog_info("\tip_src_pos = %d, ip6_src = "IPv6_BYTES_FMT"\n", ip_src_pos, IPv6_BYTES(ip6_src->bytes));
867 register_node_to_ctrl_plane(tbase->l3.tmaster, ip6_src, &null_addr, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
868 ip_len = sizeof(struct ipv6_addr);
871 for (int j = 0; j < task->n_rands; j++) {
872 offset = task->rand[j].rand_offset;
873 len = task->rand[j].rand_len;
874 mask = task->rand[j].rand_mask;
875 fixed = task->rand[j].fixed_bits;
876 plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
877 if (offset >= ip_src_pos + ip_len) // First random bit after IP
879 if (offset + len < ip_src_pos) // Last random bit before IP
883 if (offset >= ip_src_pos) {
884 int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
885 mask = mask & ip_src_mask;
886 fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
887 build_value(task, mask, 0, 0, fixed);
889 int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
891 fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
892 build_value(task, mask, 0, 0, fixed);
895 // We do not support when random partially covers IP - either starting before or finishing after
896 if (offset + len >= ip_src_pos + ip_len) { // len over the ip
897 plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
900 if (offset < ip_src_pos) {
901 plog_err("Not supported: random_offset = %d, random_len = %d, ip_src_pos = %d, ip_len = %d\n", offset, len, ip_src_pos, ip_len);
904 // Even for IPv6 the random mask supported by PROX are 32 bits only
905 struct ipv6_addr fixed_ipv6;
906 uint init_var_byte_pos = (offset - ip_src_pos);
907 for (uint i = 0; i < sizeof(struct ipv6_addr); i++) {
908 if (i < init_var_byte_pos)
909 fixed_ipv6.bytes[i] = ip6_src->bytes[i];
910 else if (i < init_var_byte_pos + len)
911 fixed_ipv6.bytes[i] = (fixed >> (i - init_var_byte_pos)) & 0xFF;
913 fixed_ipv6.bytes[i] = ip6_src->bytes[i];
915 build_value_ipv6(task, mask, 0, init_var_byte_pos * 8, null_addr, fixed_ipv6);
921 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
923 struct task_gen *task = (struct task_gen *)tbase;
924 uint8_t out[MAX_PKT_BURST] = {0};
929 task_gen_update_config(task);
931 if (task->pkt_count == 0) {
932 task_gen_reset_token_time(task);
935 if (!task->token_time.cfg.bpp)
938 token_time_update(&task->token_time, rte_rdtsc());
940 uint32_t would_send_bytes;
941 uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
945 task_gen_take_count(task, send_bulk);
946 task_gen_consume_tokens(task, would_send_bytes, send_bulk);
948 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
949 if (new_pkts == NULL)
951 uint8_t *pkt_hdr[MAX_RING_BURST];
953 task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
954 task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
955 task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
956 task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
957 task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
959 uint64_t tsc_before_tx;
961 tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
962 task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
963 ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
964 task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
966 // If we failed to send some packets, we need to do some clean-up:
969 // We need re-use the packets indexes not being sent
970 // Hence non-sent packets will not be considered as lost by the receiver when it looks at
971 // packet ids. This should also increase the percentage of packets used for latency measurements
972 task->pkt_queue_index -= ret;
974 // In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong
975 // This would result in under-estimated latency (up to 0 or negative)
976 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret);
977 task->earliest_tsc_next_pkt -= bulk_duration;
982 static void init_task_gen_seeds(struct task_gen *task)
984 for (size_t i = 0; i < sizeof(task->rand)/sizeof(task->rand[0]); ++i)
985 random_init_seed(&task->rand[i].state);
988 static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size)
990 struct pcap_pkthdr header;
994 long pkt1_fpos = ftell(pcap_file(handle));
996 while ((buf = pcap_next(handle, &header))) {
997 if (header.len > *max_frame_size)
998 *max_frame_size = header.len;
1001 int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
1002 PROX_PANIC(ret2 != 0, "Failed to reset reading pcap file\n");
1006 static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
1008 uint64_t tot_inter_pkt = 0;
1010 for (uint32_t i = 0; i < n; ++i)
1011 tot_inter_pkt += time_stamp[i];
1012 return (tot_inter_pkt + n / 2)/n;
1015 static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size)
1017 struct pcap_pkthdr header;
1021 for (uint32_t i = 0; i < n_pkts; ++i) {
1022 buf = pcap_next(handle, &header);
1024 PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
1025 proto[i].len = header.len;
1026 len = RTE_MIN(header.len, max_frame_size);
1027 if (header.len > len)
1028 plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
1031 static struct timeval beg;
1037 tv = tv_diff(&beg, &header.ts);
1038 tv_to_tsc(&tv, time_stamp + i);
1040 rte_memcpy(proto[i].buf, buf, len);
1043 if (time_stamp && n_pkts) {
1044 for (uint32_t i = n_pkts - 1; i > 0; --i)
1045 time_stamp[i] -= time_stamp[i - 1];
1046 /* Since the handle function will loop the packets,
1047 there is one time-stamp that is not provided by the
1048 pcap file. This is the time between the last and
1049 the first packet. This implementation takes the
1050 average of the inter-packet times here. */
1052 time_stamp[0] = avg_time_stamp(time_stamp + 1, n_pkts - 1);
1058 static int check_all_pkt_size(struct task_gen *task, int do_panic)
1061 for (uint32_t i = 0; i < task->n_pkts;++i) {
1062 if ((rc = check_pkt_size(task, task->pkt_template[i].len, do_panic)) != 0)
1068 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic)
1071 for (uint32_t i = 0; i < task->n_pkts;++i) {
1072 if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0)
1078 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
1080 struct pkt_template *template;
1082 for (size_t i = 0; i < task->n_pkts; ++i) {
1083 template = &task->pkt_template[i];
1084 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
1088 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
1090 struct pkt_template *template;
1091 prox_rte_ipv4_hdr *ip;
1093 task->runtime_checksum_needed = 0;
1094 for (size_t i = 0; i < task->n_pkts; ++i) {
1095 template = &task->pkt_template[i];
1096 if (template->l2_len == 0)
1098 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
1099 if (ip->version_ihl >> 4 == 4) {
1100 ip->hdr_checksum = 0;
1101 prox_ip_cksum_sw(ip);
1102 uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
1103 if (ip->next_proto_id == IPPROTO_UDP) {
1104 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
1105 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
1106 } else if (ip->next_proto_id == IPPROTO_TCP) {
1107 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
1108 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
1110 } else if (ip->version_ihl >> 4 == 6) {
1111 prox_rte_ipv6_hdr *ip6;
1112 ip6 = (prox_rte_ipv6_hdr *)(template->buf + template->l2_len);
1113 if (ip6->proto == IPPROTO_UDP) {
1114 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(ip6 + 1);
1115 udp->dgram_cksum = 0;
1116 udp->dgram_cksum = rte_ipv6_udptcp_cksum(ip6, udp);
1117 } else if (ip6->proto == IPPROTO_TCP) {
1118 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(ip6 + 1);
1120 tcp->cksum = rte_ipv6_udptcp_cksum(ip6, tcp);
1124 /* The current implementation avoids checksum
1125 calculation by determining that at packet
1126 construction time, no fields are applied that would
1127 require a recalculation of the checksum. */
1128 if (task->lat_enabled && task->lat_pos > template->l2_len)
1129 task->runtime_checksum_needed = 1;
1130 if (task->accur_pos > template->l2_len)
1131 task->runtime_checksum_needed = 1;
1132 if (task->packet_id_pos > template->l2_len)
1133 task->runtime_checksum_needed = 1;
1137 static void task_gen_pkt_template_recalc_all(struct task_gen *task)
1139 task_gen_pkt_template_recalc_metadata(task);
1140 task_gen_pkt_template_recalc_checksum(task);
1143 static void task_gen_set_pkt_templates_len(struct task_gen *task, uint32_t *pkt_sizes)
1145 struct pkt_template *src, *dst;
1147 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1148 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1149 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1150 dst->len = pkt_sizes[j];
1155 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
1157 struct pkt_template *src, *dst;
1159 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1160 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1161 src = &task->pkt_template_orig[i];
1162 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1163 dst->len = src->len;
1168 static void task_gen_reset_pkt_templates_content(struct task_gen *task)
1170 struct pkt_template *src, *dst;
1172 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1173 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1174 src = &task->pkt_template_orig[i];
1175 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1176 memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len));
1177 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1178 rte_memcpy(&dst->buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1180 task_gen_apply_sig(task, dst);
1185 static void task_gen_reset_pkt_templates(struct task_gen *task)
1187 if (task->imix_nb_pkts)
1188 task_gen_set_pkt_templates_len(task, task->imix_pkt_sizes);
1190 task_gen_reset_pkt_templates_len(task);
1191 task_gen_reset_pkt_templates_content(task);
1192 task_gen_pkt_template_recalc_all(task);
1195 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
1199 task->orig_n_pkts = 1;
1200 if (task->imix_nb_pkts == 0) {
1202 task->imix_pkt_sizes[0] = targ->pkt_size;
1204 task->n_pkts = task->imix_nb_pkts;
1206 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP);
1208 rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size);
1209 task->pkt_template_orig[0].len = task->imix_pkt_sizes[0];
1210 task_gen_reset_pkt_templates(task);
1211 check_all_pkt_size(task, DO_PANIC);
1212 check_all_fields_in_bounds(task, DO_PANIC);
1214 // If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline
1215 // In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet
1216 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1219 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
1221 char err[PCAP_ERRBUF_SIZE];
1222 uint32_t max_frame_size;
1223 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1224 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1226 task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size);
1227 plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size);
1228 PROX_PANIC(max_frame_size > task->max_frame_size,
1229 max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ?
1230 "pkt_size too high and jumbo frames disabled" : "pkt_size > mtu");
1233 task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts);
1234 if (task->imix_nb_pkts == 0) {
1235 task->n_pkts = task->orig_n_pkts;
1237 task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
1239 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP);
1240 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1242 pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size);
1244 task_gen_reset_pkt_templates(task);
1245 check_all_pkt_size(task, DO_PANIC);
1246 check_all_fields_in_bounds(task, DO_PANIC);
1247 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1250 static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size)
1252 static char name[] = "gen_pool";
1253 struct rte_mempool *ret;
1254 const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
1257 uint32_t mbuf_size = TX_MBUF_SIZE;
1258 if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
1259 mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
1260 plog_info("\t\tCreating mempool with name '%s'\n", name);
1261 ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
1262 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
1263 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
1265 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
1266 sock_id, targ->nb_mbuf - 1);
1268 plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
1269 targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
1274 void task_gen_set_pkt_count(struct task_base *tbase, uint32_t count)
1276 struct task_gen *task = (struct task_gen *)tbase;
1278 task->pkt_count = count;
1281 int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
1283 struct task_gen *task = (struct task_gen *)tbase;
1286 for (size_t i = 0; i < task->n_pkts; ++i) {
1287 if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
1289 if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
1292 for (size_t i = 0; i < task->n_pkts; ++i) {
1293 task->pkt_template[i].len = pkt_size;
1298 int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
1300 struct task_gen *task = (struct task_gen *)tbase;
1303 memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t));
1304 for (size_t i = 0; i < nb_pkt_sizes; ++i) {
1305 if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1307 if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1310 // only set new_imix_nb_pkts if checks of pkt sizes succeeded
1311 task->new_imix_nb_pkts = nb_pkt_sizes;
1315 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
1317 struct task_gen *task = (struct task_gen *)tbase;
1319 task->new_rate_bps = bps;
1322 void task_gen_reset_randoms(struct task_base *tbase)
1324 struct task_gen *task = (struct task_gen *)tbase;
1326 for (uint32_t i = 0; i < task->n_rands; ++i) {
1327 task->rand[i].rand_mask = 0;
1328 task->rand[i].fixed_bits = 0;
1329 task->rand[i].rand_offset = 0;
1334 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
1336 struct task_gen *task = (struct task_gen *)tbase;
1338 if (offset + len > task->max_frame_size)
1340 for (size_t i = 0; i < task->n_pkts; ++i) {
1341 uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
1342 uint8_t *dst = task->pkt_template[i].buf;
1344 rte_memcpy(dst + offset, &to_write, len);
1347 task_gen_pkt_template_recalc_all(task);
1352 void task_gen_reset_values(struct task_base *tbase)
1354 struct task_gen *task = (struct task_gen *)tbase;
1356 task_gen_reset_pkt_templates_content(task);
1357 task_gen_pkt_template_recalc_metadata(task);
1358 check_all_pkt_size(task, DO_NOT_PANIC);
1359 check_all_fields_in_bounds(task, DO_NOT_PANIC);
1360 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1362 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1363 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1364 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1369 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
1371 struct task_gen *task = (struct task_gen *)tbase;
1373 return task->n_rands;
1376 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
1378 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1379 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1380 uint32_t max_frame_size;
1382 task->loop = targ->loop;
1384 task->hz = rte_get_tsc_hz();
1386 char err[PCAP_ERRBUF_SIZE];
1387 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1388 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1390 task->n_pkts = pcap_count_pkts(handle, &max_frame_size);
1391 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
1393 task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size);
1395 PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
1398 plogx_info("Configured to load %u packets\n", targ->n_pkts);
1399 if (task->n_pkts > targ->n_pkts)
1400 task->n_pkts = targ->n_pkts;
1402 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1404 size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
1405 uint8_t *mem = prox_zmalloc(mem_size, task->socket_id);
1407 PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
1408 task->proto = (struct pkt_template *) mem;
1409 task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
1411 for (uint i = 0; i < targ->n_pkts; i++) {
1412 task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id);
1413 PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size);
1416 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size);
1420 static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offset)
1422 for (uint32_t i = 0; i < task->n_rands; ++i) {
1423 if (task->rand[i].rand_offset == offset) {
1431 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
1433 struct task_gen *task = (struct task_gen *)tbase;
1434 uint32_t existing_rand;
1436 if (rand_id == UINT32_MAX && task->n_rands == 64) {
1437 plog_err("Too many randoms\n");
1440 uint32_t mask, fixed, len;
1442 if (parse_random_str(&mask, &fixed, &len, rand_str)) {
1443 plog_err("%s\n", get_parse_err());
1446 task->runtime_checksum_needed = 1;
1448 existing_rand = task_gen_find_random_with_offset(task, offset);
1449 if (existing_rand != UINT32_MAX) {
1450 plog_warn("Random at offset %d already set => overwriting len = %d %s\n", offset, len, rand_str);
1451 rand_id = existing_rand;
1452 task->rand[rand_id].rand_len = len;
1453 task->rand[rand_id].rand_offset = offset;
1454 task->rand[rand_id].rand_mask = mask;
1455 task->rand[rand_id].fixed_bits = fixed;
1459 task->rand[task->n_rands].rand_len = len;
1460 task->rand[task->n_rands].rand_offset = offset;
1461 task->rand[task->n_rands].rand_mask = mask;
1462 task->rand[task->n_rands].fixed_bits = fixed;
1468 static void start(struct task_base *tbase)
1470 struct task_gen *task = (struct task_gen *)tbase;
1471 task->pkt_queue_index = 0;
1473 task_gen_reset_token_time(task);
1474 if (tbase->l3.tmaster) {
1475 register_all_ip_to_ctrl_plane(task);
1479 Handle the case when two tasks transmit to the same port
1480 and one of them is stopped. In that case ARP (requests or replies)
1481 might not be sent. Master will have to keep a list of rings.
1482 stop will have to de-register IP from ctrl plane.
1483 un-registration will remove the ring. when having more than
1484 one active rings, master can always use the first one
1488 static void start_pcap(struct task_base *tbase)
1490 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1491 /* When we start, the first packet is sent immediately. */
1492 task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
1496 static void init_task_gen_early(struct task_args *targ)
1498 uint8_t *generator_count = prox_sh_find_system("generator_count");
1500 if (generator_count == NULL) {
1501 generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id));
1502 PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n");
1503 prox_sh_add_system("generator_count", generator_count);
1505 targ->generator_id = *generator_count;
1506 (*generator_count)++;
1509 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
1511 struct task_gen *task = (struct task_gen *)tbase;
1512 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1514 task->packet_id_pos = targ->packet_id_pos;
1516 struct prox_port_cfg *port = find_reachable_port(targ);
1517 // TODO: check that all reachable ports have the same mtu...
1519 task->cksum_offload = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
1521 task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE;
1523 // Not generating to any port...
1524 task->max_frame_size = PROX_RTE_ETHER_MAX_LEN;
1526 task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size);
1527 PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
1529 task->hz = rte_get_tsc_hz();
1530 task->lat_pos = targ->lat_pos;
1531 task->accur_pos = targ->accur_pos;
1532 task->sig_pos = targ->sig_pos;
1533 task->sig = targ->sig;
1534 task->new_rate_bps = targ->rate_bps;
1537 * For tokens, use 10 Gbps as base rate
1538 * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
1539 * Script can query prox "port info" command to find out the port link speed to know
1540 * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
1541 * probably also to check the driver (as returned by the same "port info" command.
1543 struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
1544 token_time_init(&task->token_time, &tt_cfg);
1546 init_task_gen_seeds(task);
1548 task->min_bulk_size = targ->min_bulk_size;
1549 task->max_bulk_size = targ->max_bulk_size;
1550 if (task->min_bulk_size < 1)
1551 task->min_bulk_size = 1;
1552 if (task->max_bulk_size < 1)
1553 task->max_bulk_size = 64;
1554 PROX_PANIC(task->max_bulk_size > 64, "max_bulk_size higher than 64\n");
1555 PROX_PANIC(task->max_bulk_size < task->min_bulk_size, "max_bulk_size must be > than min_bulk_size\n");
1557 task->pkt_count = -1;
1558 task->lat_enabled = targ->lat_enabled;
1559 task->runtime_flags = targ->runtime_flags;
1560 PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
1562 task->generator_id = targ->generator_id;
1563 plog_info("\t\tGenerator id = %d\n", task->generator_id);
1565 // Allocate array holding bytes to tsc for supported frame sizes
1566 task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id);
1567 PROX_PANIC(task->bytes_to_tsc == NULL,
1568 "Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size);
1570 // task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
1571 // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
1572 uint64_t bytes_per_hz = UINT64_MAX;
1573 if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) {
1574 bytes_per_hz = task->port->max_link_speed * 125000L;
1575 plog_info("\t\tPort %u: max link speed is %ld Mbps\n",
1576 (uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
1578 // There are cases where hz estimate might be slighly over-estimated
1579 // This results in too much extrapolation
1580 // Only account for 99% of extrapolation to handle cases with up to 1% error clocks
1581 for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) {
1582 if (bytes_per_hz == UINT64_MAX)
1583 task->bytes_to_tsc[i] = 0;
1585 task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz;
1588 task->imix_nb_pkts = targ->imix_nb_pkts;
1589 for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) {
1590 task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i];
1592 if (!strcmp(targ->pcap_file, "")) {
1593 plog_info("\t\tUsing inline definition of a packet\n");
1594 task_init_gen_load_pkt_inline(task, targ);
1596 plog_info("\t\tLoading from pcap %s\n", targ->pcap_file);
1597 task_init_gen_load_pcap(task, targ);
1600 PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port");
1601 if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) {
1602 task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC;
1603 memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr));
1604 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1605 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1608 for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
1609 PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
1610 "Failed to add random\n");
1614 static struct task_init task_init_gen = {
1616 .init = init_task_gen,
1617 .handle = handle_gen_bulk,
1619 .early_init = init_task_gen_early,
1621 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1622 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1623 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1625 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1627 .size = sizeof(struct task_gen)
1630 static struct task_init task_init_gen_l3 = {
1632 .sub_mode_str = "l3",
1633 .init = init_task_gen,
1634 .handle = handle_gen_bulk,
1636 .early_init = init_task_gen_early,
1638 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1639 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1640 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1642 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1644 .size = sizeof(struct task_gen)
1647 /* This mode uses time stamps in the pcap file */
1648 static struct task_init task_init_gen_pcap = {
1650 .sub_mode_str = "pcap",
1651 .init = init_task_gen_pcap,
1652 .handle = handle_gen_pcap_bulk,
1653 .start = start_pcap,
1654 .early_init = init_task_gen_early,
1656 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1658 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1660 .size = sizeof(struct task_gen_pcap)
1663 __attribute__((constructor)) static void reg_task_gen(void)
1665 reg_task(&task_init_gen);
1666 reg_task(&task_init_gen_l3);
1667 reg_task(&task_init_gen_pcap);