2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_cycles.h>
21 #include <rte_version.h>
22 #include <rte_byteorder.h>
23 #include <rte_ether.h>
24 #include <rte_hash_crc.h>
25 #include <rte_malloc.h>
27 #include "prox_shared.h"
29 #include "prox_malloc.h"
30 #include "handle_gen.h"
31 #include "handle_lat.h"
32 #include "task_init.h"
33 #include "task_base.h"
34 #include "prox_port_cfg.h"
39 #include "mbuf_utils.h"
41 #include "prox_cksum.h"
43 #include "prox_assert.h"
45 #include "token_time.h"
46 #include "local_mbuf.h"
49 #include "handle_master.h"
58 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
61 #define DO_NOT_PANIC 0
64 #define NOT_FROM_PCAP 0
66 #define TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC 1
68 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
70 const uint32_t pkt_size = pkt_template->len;
72 rte_pktmbuf_pkt_len(mbuf) = pkt_size;
73 rte_pktmbuf_data_len(mbuf) = pkt_size;
75 rte_memcpy(pkt, pkt_template->buf, pkt_template->len);
78 struct task_gen_pcap {
79 struct task_base base;
81 struct local_mbuf local_mbuf;
83 struct pkt_template *proto;
92 struct task_base base;
94 struct token_time token_time;
95 struct local_mbuf local_mbuf;
96 struct pkt_template *pkt_template; /* packet templates used at runtime */
97 uint64_t write_duration_estimate; /* how long it took previously to write the time stamps in the packets */
98 uint64_t earliest_tsc_next_pkt;
99 uint64_t new_rate_bps;
100 uint64_t pkt_queue_index;
101 uint32_t n_pkts; /* number of packets in pcap */
102 uint32_t orig_n_pkts; /* number of packets in pcap */
103 uint32_t pkt_idx; /* current packet from pcap */
104 uint32_t pkt_count; /* how many pakets to generate */
105 uint32_t max_frame_size;
106 uint32_t runtime_flags;
108 uint16_t packet_id_pos;
113 uint8_t generator_id;
114 uint8_t n_rands; /* number of randoms */
115 uint8_t min_bulk_size;
116 uint8_t max_bulk_size;
118 uint8_t runtime_checksum_needed;
121 uint32_t rand_mask; /* since the random vals are uniform, masks don't introduce bias */
122 uint32_t fixed_bits; /* length of each random (max len = 4) */
123 uint16_t rand_offset; /* each random has an offset*/
124 uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
126 uint64_t accur[ACCURACY_WINDOW];
127 uint64_t pkt_tsc_offset[64];
128 struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
129 prox_rte_ether_addr src_mac;
131 uint8_t cksum_offload;
132 struct prox_port_cfg *port;
133 uint64_t *bytes_to_tsc;
134 uint32_t imix_pkt_sizes[MAX_IMIX_PKTS];
135 uint32_t imix_nb_pkts;
136 uint32_t new_imix_nb_pkts;
137 } __rte_cache_aligned;
139 static inline uint8_t ipv4_get_hdr_len(prox_rte_ipv4_hdr *ip)
141 /* Optimize for common case of IPv4 header without options. */
142 if (ip->version_ihl == 0x45)
143 return sizeof(prox_rte_ipv4_hdr);
144 if (unlikely(ip->version_ihl >> 4 != 4)) {
145 plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
148 return (ip->version_ihl & 0xF) * 4;
151 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
153 *l2_len = sizeof(prox_rte_ether_hdr);
155 prox_rte_vlan_hdr *vlan_hdr;
156 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
157 prox_rte_ipv4_hdr *ip;
158 uint16_t ether_type = eth_hdr->ether_type;
161 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(prox_rte_vlan_hdr) < len)) {
162 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + *l2_len);
164 ether_type = vlan_hdr->eth_proto;
167 // No L3 cksum offload for IPv6, but TODO L4 offload
168 // ETYPE_EoGRE CRC not implemented yet
170 switch (ether_type) {
184 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
189 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + *l2_len);
190 *l3_len = ipv4_get_hdr_len(ip);
194 static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_template *pkt_template, int cksum_offload)
196 uint16_t l2_len = pkt_template->l2_len;
197 uint16_t l3_len = pkt_template->l3_len;
200 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr*)(hdr + l2_len);
201 prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
205 static void task_gen_reset_token_time(struct task_gen *task)
207 token_time_set_bpp(&task->token_time, task->new_rate_bps);
208 token_time_reset(&task->token_time, rte_rdtsc(), 0);
211 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
213 if (task->pkt_count == (uint32_t)-1)
216 if (task->pkt_count >= send_bulk)
217 task->pkt_count -= send_bulk;
223 static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts)
225 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
226 uint64_t now = rte_rdtsc();
227 uint64_t send_bulk = 0;
228 uint32_t pkt_idx_tmp = task->pkt_idx;
230 if (pkt_idx_tmp == task->n_pkts) {
231 PROX_ASSERT(task->loop);
235 for (uint16_t j = 0; j < 64; ++j) {
236 uint64_t tsc = task->proto_tsc[pkt_idx_tmp];
237 if (task->last_tsc + tsc <= now) {
238 task->last_tsc += tsc;
241 if (pkt_idx_tmp == task->n_pkts) {
252 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
253 if (new_pkts == NULL)
256 for (uint16_t j = 0; j < send_bulk; ++j) {
257 struct rte_mbuf *next_pkt = new_pkts[j];
258 struct pkt_template *pkt_template = &task->proto[task->pkt_idx];
259 uint8_t *hdr = rte_pktmbuf_mtod(next_pkt, uint8_t *);
261 pkt_template_init_mbuf(pkt_template, next_pkt, hdr);
264 if (task->pkt_idx == task->n_pkts) {
272 return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
275 static inline uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
277 return task->bytes_to_tsc[bytes];
280 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
282 return pkt_idx + 1 >= task->n_pkts? 0 : pkt_idx + 1;
285 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
287 return (task->pkt_idx + offset) % task->n_pkts;
290 static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *total_bytes)
292 /* The biggest bulk we allow to send is task->max_bulk_size
293 packets. The max bulk size can also be limited by the
294 pkt_count field. At the same time, we are rate limiting
295 based on the specified speed (in bytes per second) so token
296 bucket based rate limiting must also be applied. The
297 minimum bulk size is also constrained. If the calculated
298 bulk size is less then the minimum, then don't send
301 const uint32_t min_bulk = task->min_bulk_size;
302 uint32_t max_bulk = task->max_bulk_size;
304 if (task->pkt_count != (uint32_t)-1 && task->pkt_count < max_bulk) {
305 max_bulk = task->pkt_count;
308 uint32_t send_bulk = 0;
309 uint32_t pkt_idx_tmp = task->pkt_idx;
310 uint32_t would_send_bytes = 0;
314 * TODO - this must be improved to take into account the fact that, after applying randoms
315 * The packet can be replaced by an ARP
317 for (uint16_t j = 0; j < max_bulk; ++j) {
318 struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
319 pkt_size = pktpl->len;
320 uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
321 if (pkt_len + would_send_bytes > task->token_time.bytes_now)
324 pkt_idx_tmp = task_gen_next_pkt_idx(task, pkt_idx_tmp);
327 would_send_bytes += pkt_len;
330 if (send_bulk < min_bulk)
332 *total_bytes = would_send_bytes;
336 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
338 uint32_t ret, ret_tmp;
340 for (uint16_t i = 0; i < task->n_rands; ++i) {
341 ret = random_next(&task->rand[i].state);
342 ret_tmp = (ret & task->rand[i].rand_mask) | task->rand[i].fixed_bits;
344 ret_tmp = rte_bswap32(ret_tmp);
345 /* At this point, the lower order bytes (BE) contain
346 the generated value. The address where the values
347 of interest starts is at ret_tmp + 4 - rand_len. */
348 uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
349 rte_memcpy(hdr + task->rand[i].rand_offset, pret_tmp + 4 - task->rand[i].rand_len, task->rand[i].rand_len);
353 static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
358 for (uint16_t i = 0; i < count; ++i)
359 task_gen_apply_random_fields(task, pkt_hdr[i]);
362 static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
364 *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
367 static void task_gen_apply_sig(struct task_gen *task, struct pkt_template *dst)
370 *(uint32_t *)(dst->buf + task->sig_pos) = task->sig;
373 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
375 if (!task->accur_pos)
378 /* The accuracy of task->pkt_queue_index - ACCURACY_WINDOW is stored in
379 packet task->pkt_queue_index. The ID modulo ACCURACY_WINDOW is the
381 for (uint16_t j = 0; j < count; ++j) {
382 uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & (ACCURACY_WINDOW - 1)];
383 task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
387 static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
389 struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_pos);
394 static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
396 if (!task->packet_id_pos)
399 for (uint16_t i = 0; i < count; ++i) {
401 unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
402 task_gen_apply_unique_id(task, pkt_hdr[i], &id);
406 static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
408 if (!(task->runtime_flags & TASK_TX_CRC))
411 if (!task->runtime_checksum_needed)
414 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
415 for (uint16_t i = 0; i < count; ++i) {
416 struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
417 checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
418 pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
422 static void task_gen_consume_tokens(struct task_gen *task, uint32_t tokens, uint32_t send_count)
424 /* If max burst has been sent, we can't keep up so just assume
425 that we can (leaving a "gap" in the packet stream on the
427 task->token_time.bytes_now -= tokens;
428 if (send_count == task->max_bulk_size && task->token_time.bytes_now > tokens) {
429 task->token_time.bytes_now = tokens;
433 static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t count)
435 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
436 struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
437 uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
438 #ifdef NO_EXTRAPOLATION
439 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1];
441 uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
442 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
445 return bulk_duration;
448 static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
450 if (!task->lat_enabled)
453 uint64_t tx_tsc, delta_t;
454 uint64_t tsc_before_tx = 0;
456 /* Just before sending the packets, apply the time stamp
457 relative to when the first packet will be sent. The first
458 packet will be sent now. The time is read for each packet
459 to reduce the error towards the actual time the packet will
461 uint64_t write_tsc_after, write_tsc_before;
463 write_tsc_before = rte_rdtsc();
465 /* The time it took previously to write the time stamps in the
466 packets is used as an estimate for how long it will take to
467 write the time stamps now. The estimated time at which the
468 packets will actually be sent will be at tx_tsc. */
469 tx_tsc = write_tsc_before + task->write_duration_estimate;
471 /* The offset delta_t tracks the difference between the actual
472 time and the time written in the packets. Adding the offset
473 to the actual time insures that the time written in the
474 packets is monotonically increasing. At the same time,
475 simply sleeping until delta_t is zero would leave a period
476 of silence on the line. The error has been introduced
477 earlier, but the packets have already been sent. */
479 /* This happens typically if previous bulk was delayed
480 by an interrupt e.g. (with Time in nsec)
481 Time x: sleep 4 microsec
482 Time x+4000: send 64 packets (64 packets as 4000 nsec, w/ 10Gbps 64 bytes)
483 Time x+5000: send 16 packets (16 packets as 1000 nsec)
484 When we send the 16 packets, the 64 ealier packets are not yet
486 if (tx_tsc < task->earliest_tsc_next_pkt)
487 delta_t = task->earliest_tsc_next_pkt - tx_tsc;
491 for (uint16_t i = 0; i < count; ++i) {
492 uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
493 const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
494 *pos = pkt_tsc >> LATENCY_ACCURACY;
497 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
498 task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
499 write_tsc_after = rte_rdtsc();
500 task->write_duration_estimate = write_tsc_after - write_tsc_before;
502 /* Make sure that the time stamps that were written
503 are valid. The offset must be taken into account */
505 tsc_before_tx = rte_rdtsc();
506 } while (tsc_before_tx < tx_tsc);
508 return tsc_before_tx;
511 static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint64_t tsc_before_tx)
513 if (!task->accur_pos)
516 uint64_t accur = rte_rdtsc() - tsc_before_tx;
517 uint64_t first_accuracy_idx = task->pkt_queue_index - count;
519 for (uint32_t i = 0; i < count; ++i) {
520 uint32_t accuracy_idx = (first_accuracy_idx + i) & (ACCURACY_WINDOW - 1);
522 task->accur[accuracy_idx] = accur;
526 static void task_gen_load_and_prefetch(struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
528 for (uint16_t i = 0; i < count; ++i)
529 rte_prefetch0(mbufs[i]);
530 for (uint16_t i = 0; i < count; ++i)
531 pkt_hdr[i] = rte_pktmbuf_mtod(mbufs[i], uint8_t *);
532 for (uint16_t i = 0; i < count; ++i)
533 rte_prefetch0(pkt_hdr[i]);
536 static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
538 uint64_t will_send_bytes = 0;
540 for (uint16_t i = 0; i < count; ++i) {
541 struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
542 struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
543 pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
544 prox_rte_ether_hdr *hdr = (prox_rte_ether_hdr *)pkt_hdr[i];
545 if (task->lat_enabled) {
546 #ifdef NO_EXTRAPOLATION
547 task->pkt_tsc_offset[i] = 0;
549 task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
551 will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
553 task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
557 static int task_gen_allocate_templates(struct task_gen *task, uint32_t orig_nb_pkts, uint32_t nb_pkts, int do_panic, int pcap)
559 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
560 size_t orig_mem_size = orig_nb_pkts * sizeof(*task->pkt_template);
561 task->pkt_template = prox_zmalloc(mem_size, task->socket_id);
562 task->pkt_template_orig = prox_zmalloc(orig_mem_size, task->socket_id);
564 if (task->pkt_template == NULL || task->pkt_template_orig == NULL) {
565 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for %s\n", mem_size, pcap ? "pcap file":"packet template");
569 for (size_t i = 0; i < orig_nb_pkts; i++) {
570 task->pkt_template_orig[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
571 if (task->pkt_template_orig[i].buf == NULL) {
572 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
576 for (size_t i = 0; i < nb_pkts; i++) {
577 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
578 if (task->pkt_template[i].buf == NULL) {
579 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for %s\n", task->max_frame_size, pcap ? "packet from pcap": "packet");
586 static int task_gen_reallocate_templates(struct task_gen *task, uint32_t nb_pkts, int do_panic)
588 // Need to free up bufs allocated in previous (longer) imix
589 for (size_t i = nb_pkts; i < task->n_pkts; i++) {
590 if (task->pkt_template[i].buf) {
591 rte_free(task->pkt_template[i].buf);
592 task->pkt_template[i].buf = NULL;
596 size_t mem_size = nb_pkts * sizeof(*task->pkt_template);
597 struct pkt_template *ptr;
598 // re-allocate memory for new pkt_template (this might allocate additional memory or free up some...)
599 if ((ptr = rte_realloc_socket(task->pkt_template, mem_size, RTE_CACHE_LINE_SIZE, task->socket_id)) != NULL) {
600 task->pkt_template = ptr;
602 plog_err_or_panic(do_panic, "Failed to allocate %lu bytes (in huge pages) for packet template for IMIX\n", mem_size);
606 // Need to allocate bufs for new template but no need to reallocate for existing ones
607 for (size_t i = task->n_pkts; i < nb_pkts; ++i) {
608 task->pkt_template[i].buf = prox_zmalloc(task->max_frame_size, task->socket_id);
609 if (task->pkt_template[i].buf == NULL) {
610 plog_err_or_panic(do_panic, "Failed to allocate %u bytes (in huge pages) for packet %zd in IMIX\n", task->max_frame_size, i);
617 static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
619 const uint16_t min_len = sizeof(prox_rte_ether_hdr) + sizeof(prox_rte_ipv4_hdr);
620 const uint16_t max_len = task->max_frame_size;
623 PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
624 PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
625 PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
629 plog_err("Invalid packet size length (no packet defined?)\n");
632 if (pkt_size > max_len) {
633 if (pkt_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE - 4)
634 plog_err("pkt_size too high and jumbo frames disabled\n");
636 plog_err("pkt_size out of range (must be <= (mtu=%u))\n", max_len);
639 if (pkt_size < min_len) {
640 plog_err("pkt_size out of range (must be >= %u)\n", min_len);
647 static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
649 if (task->lat_enabled) {
650 uint32_t pos_beg = task->lat_pos;
651 uint32_t pos_end = task->lat_pos + 3U;
654 PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
655 pos_beg, pos_end, pkt_size);
656 else if (pkt_size <= pos_end) {
657 plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
661 if (task->packet_id_pos) {
662 uint32_t pos_beg = task->packet_id_pos;
663 uint32_t pos_end = task->packet_id_pos + 4U;
666 PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
667 pos_beg, pos_end, pkt_size);
668 else if (pkt_size <= pos_end) {
669 plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
673 if (task->accur_pos) {
674 uint32_t pos_beg = task->accur_pos;
675 uint32_t pos_end = task->accur_pos + 3U;
678 PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u-%u, but packet size is %u bytes\n",
679 pos_beg, pos_end, pkt_size);
680 else if (pkt_size <= pos_end) {
681 plog_err("Writing accuracy at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
688 static int task_gen_set_eth_ip_udp_sizes(struct task_gen *task, uint32_t n_orig_pkts, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
692 prox_rte_ipv4_hdr *ip;
693 struct pkt_template *template;
695 for (size_t j = 0; j < nb_pkt_sizes; ++j) {
696 for (size_t i = 0; i < n_orig_pkts; ++i) {
697 k = j * n_orig_pkts + i;
698 template = &task->pkt_template[k];
699 template->len = pkt_sizes[j];
700 rte_memcpy(template->buf, task->pkt_template_orig[i].buf, pkt_sizes[j]);
701 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
702 if (template->l2_len == 0)
704 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
705 ip->total_length = rte_bswap16(pkt_sizes[j] - template->l2_len);
706 l4_len = pkt_sizes[j] - template->l2_len - template->l3_len;
707 ip->hdr_checksum = 0;
708 prox_ip_cksum_sw(ip);
710 if (ip->next_proto_id == IPPROTO_UDP) {
711 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
712 udp->dgram_len = rte_bswap16(l4_len);
713 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
714 } else if (ip->next_proto_id == IPPROTO_TCP) {
715 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
716 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
723 static int task_gen_apply_imix(struct task_gen *task, int do_panic)
725 struct pkt_template *ptr;
727 task->imix_nb_pkts = task->new_imix_nb_pkts;
728 uint32_t n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
730 if ((n_pkts != task->n_pkts) && ((rc = task_gen_reallocate_templates(task, n_pkts, do_panic)) < 0))
733 task->n_pkts = n_pkts;
734 if (task->pkt_idx >= n_pkts)
736 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
740 static void task_gen_update_config(struct task_gen *task)
742 if (task->token_time.cfg.bpp != task->new_rate_bps)
743 task_gen_reset_token_time(task);
744 if (task->new_imix_nb_pkts)
745 task_gen_apply_imix(task, DO_NOT_PANIC);
746 task->new_imix_nb_pkts = 0;
749 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
751 struct task_base *tbase = (struct task_base *)task;
753 build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
755 build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
758 register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
761 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
763 struct task_base *tbase = (struct task_base *)task;
768 for (uint32_t i = 0; i < task->n_pkts; ++i) {
769 struct pkt_template *pktpl = &task->pkt_template[i];
770 unsigned int ip_src_pos = 0;
772 unsigned int l2_len = sizeof(prox_rte_ether_hdr);
774 uint8_t *pkt = pktpl->buf;
775 prox_rte_ether_hdr *eth_hdr = (prox_rte_ether_hdr*)pkt;
776 uint16_t ether_type = eth_hdr->ether_type;
777 prox_rte_vlan_hdr *vlan_hdr;
780 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(prox_rte_vlan_hdr) < pktpl->len)) {
781 vlan_hdr = (prox_rte_vlan_hdr *)(pkt + l2_len);
783 ether_type = vlan_hdr->eth_proto;
785 if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
789 if ((ether_type != ETYPE_IPv4) && !maybe_ipv4)
792 prox_rte_ipv4_hdr *ip = (prox_rte_ipv4_hdr *)(pkt + l2_len);
793 PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
795 // Even if IPv4 header contains options, options are after ip src and dst
796 ip_src_pos = l2_len + sizeof(prox_rte_ipv4_hdr) - 2 * sizeof(uint32_t);
797 uint32_t *ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
798 plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
799 register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
801 for (int j = 0; j < task->n_rands; j++) {
802 offset = task->rand[j].rand_offset;
803 len = task->rand[j].rand_len;
804 mask = task->rand[j].rand_mask;
805 fixed = task->rand[j].fixed_bits;
806 plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
807 if ((offset < ip_src_pos + 4) && (offset + len >= ip_src_pos)) {
808 if (offset >= ip_src_pos) {
809 int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
810 mask = mask & ip_src_mask;
811 fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
812 build_value(task, mask, 0, 0, fixed);
814 int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
816 fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
817 build_value(task, mask, 0, 0, fixed);
824 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
826 struct task_gen *task = (struct task_gen *)tbase;
827 uint8_t out[MAX_PKT_BURST] = {0};
832 task_gen_update_config(task);
834 if (task->pkt_count == 0) {
835 task_gen_reset_token_time(task);
838 if (!task->token_time.cfg.bpp)
841 token_time_update(&task->token_time, rte_rdtsc());
843 uint32_t would_send_bytes;
844 uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
848 task_gen_take_count(task, send_bulk);
849 task_gen_consume_tokens(task, would_send_bytes, send_bulk);
851 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
852 if (new_pkts == NULL)
854 uint8_t *pkt_hdr[MAX_RING_BURST];
856 task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
857 task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
858 task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
859 task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
860 task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
862 uint64_t tsc_before_tx;
864 tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
865 task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
866 ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
867 task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
869 // If we failed to send some packets, we need to do some clean-up:
872 // We need re-use the packets indexes not being sent
873 // Hence non-sent packets will not be considered as lost by the receiver when it looks at
874 // packet ids. This should also increase the percentage of packets used for latency measurements
875 task->pkt_queue_index -= ret;
877 // In case of failures, the estimate about when we can send next packet (earliest_tsc_next_pkt) is wrong
878 // This would result in under-estimated latency (up to 0 or negative)
879 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, ret);
880 task->earliest_tsc_next_pkt -= bulk_duration;
885 static void init_task_gen_seeds(struct task_gen *task)
887 for (size_t i = 0; i < sizeof(task->rand)/sizeof(task->rand[0]); ++i)
888 random_init_seed(&task->rand[i].state);
891 static uint32_t pcap_count_pkts(pcap_t *handle, uint32_t *max_frame_size)
893 struct pcap_pkthdr header;
897 long pkt1_fpos = ftell(pcap_file(handle));
899 while ((buf = pcap_next(handle, &header))) {
900 if (header.len > *max_frame_size)
901 *max_frame_size = header.len;
904 int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
905 PROX_PANIC(ret2 != 0, "Failed to reset reading pcap file\n");
909 static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
911 uint64_t tot_inter_pkt = 0;
913 for (uint32_t i = 0; i < n; ++i)
914 tot_inter_pkt += time_stamp[i];
915 return (tot_inter_pkt + n / 2)/n;
918 static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp, uint32_t max_frame_size)
920 struct pcap_pkthdr header;
924 for (uint32_t i = 0; i < n_pkts; ++i) {
925 buf = pcap_next(handle, &header);
927 PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
928 proto[i].len = header.len;
929 len = RTE_MIN(header.len, max_frame_size);
930 if (header.len > len)
931 plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
934 static struct timeval beg;
940 tv = tv_diff(&beg, &header.ts);
941 tv_to_tsc(&tv, time_stamp + i);
943 rte_memcpy(proto[i].buf, buf, len);
946 if (time_stamp && n_pkts) {
947 for (uint32_t i = n_pkts - 1; i > 0; --i)
948 time_stamp[i] -= time_stamp[i - 1];
949 /* Since the handle function will loop the packets,
950 there is one time-stamp that is not provided by the
951 pcap file. This is the time between the last and
952 the first packet. This implementation takes the
953 average of the inter-packet times here. */
955 time_stamp[0] = avg_time_stamp(time_stamp + 1, n_pkts - 1);
961 static int check_all_pkt_size(struct task_gen *task, int do_panic)
964 for (uint32_t i = 0; i < task->n_pkts;++i) {
965 if ((rc = check_pkt_size(task, task->pkt_template[i].len, do_panic)) != 0)
971 static int check_all_fields_in_bounds(struct task_gen *task, int do_panic)
974 for (uint32_t i = 0; i < task->n_pkts;++i) {
975 if ((rc = check_fields_in_bounds(task, task->pkt_template[i].len, do_panic)) != 0)
981 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
983 struct pkt_template *template;
985 for (size_t i = 0; i < task->n_pkts; ++i) {
986 template = &task->pkt_template[i];
987 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
991 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
993 struct pkt_template *template;
994 prox_rte_ipv4_hdr *ip;
996 task->runtime_checksum_needed = 0;
997 for (size_t i = 0; i < task->n_pkts; ++i) {
998 template = &task->pkt_template[i];
999 if (template->l2_len == 0)
1001 ip = (prox_rte_ipv4_hdr *)(template->buf + template->l2_len);
1003 ip->hdr_checksum = 0;
1004 prox_ip_cksum_sw(ip);
1005 uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
1007 if (ip->next_proto_id == IPPROTO_UDP) {
1008 prox_rte_udp_hdr *udp = (prox_rte_udp_hdr *)(((uint8_t *)ip) + template->l3_len);
1009 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
1010 } else if (ip->next_proto_id == IPPROTO_TCP) {
1011 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
1012 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
1015 /* The current implementation avoids checksum
1016 calculation by determining that at packet
1017 construction time, no fields are applied that would
1018 require a recalculation of the checksum. */
1019 if (task->lat_enabled && task->lat_pos > template->l2_len)
1020 task->runtime_checksum_needed = 1;
1021 if (task->accur_pos > template->l2_len)
1022 task->runtime_checksum_needed = 1;
1023 if (task->packet_id_pos > template->l2_len)
1024 task->runtime_checksum_needed = 1;
1028 static void task_gen_pkt_template_recalc_all(struct task_gen *task)
1030 task_gen_pkt_template_recalc_metadata(task);
1031 task_gen_pkt_template_recalc_checksum(task);
1034 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
1036 struct pkt_template *src, *dst;
1038 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1039 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1040 src = &task->pkt_template_orig[i];
1041 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1042 dst->len = src->len;
1047 static void task_gen_reset_pkt_templates_content(struct task_gen *task)
1049 struct pkt_template *src, *dst;
1051 for (size_t j = 0; j < task->n_pkts / task->orig_n_pkts; ++j) {
1052 for (size_t i = 0; i < task->orig_n_pkts; ++i) {
1053 src = &task->pkt_template_orig[i];
1054 dst = &task->pkt_template[j * task->orig_n_pkts + i];
1055 memcpy(dst->buf, src->buf, RTE_MAX(src->len, dst->len));
1056 task_gen_apply_sig(task, dst);
1061 static void task_gen_reset_pkt_templates(struct task_gen *task)
1063 task_gen_reset_pkt_templates_len(task);
1064 task_gen_reset_pkt_templates_content(task);
1065 task_gen_pkt_template_recalc_all(task);
1068 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
1072 task->orig_n_pkts = 1;
1073 if (task->imix_nb_pkts == 0) {
1075 task->imix_pkt_sizes[0] = targ->pkt_size;
1077 task->n_pkts = task->imix_nb_pkts;
1079 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, NOT_FROM_PCAP);
1081 rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, task->max_frame_size);
1082 task->pkt_template_orig[0].len = task->imix_pkt_sizes[0];
1083 task_gen_reset_pkt_templates(task);
1084 check_all_pkt_size(task, DO_PANIC);
1085 check_all_fields_in_bounds(task, DO_PANIC);
1087 // If IMIX was not specified then pkt_size is specified using pkt_size parameter or the length of pkt_inline
1088 // In that case, for backward compatibility, we do NOT adapt the length of IP and UDP to the length of the packet
1089 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1092 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
1094 char err[PCAP_ERRBUF_SIZE];
1095 uint32_t max_frame_size;
1096 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1097 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1099 task->orig_n_pkts = pcap_count_pkts(handle, &max_frame_size);
1100 plogx_info("%u packets in pcap file '%s'; max frame size=%d\n", task->orig_n_pkts, targ->pcap_file, max_frame_size);
1101 PROX_PANIC(max_frame_size > task->max_frame_size,
1102 max_frame_size > PROX_RTE_ETHER_MAX_LEN + 2 * PROX_VLAN_TAG_SIZE -4 ?
1103 "pkt_size too high and jumbo frames disabled" : "pkt_size > mtu");
1106 task->orig_n_pkts = RTE_MIN(task->orig_n_pkts, targ->n_pkts);
1107 if (task->imix_nb_pkts == 0) {
1108 task->n_pkts = task->orig_n_pkts;
1110 task->n_pkts = task->imix_nb_pkts * task->orig_n_pkts;
1112 task_gen_allocate_templates(task, task->orig_n_pkts, task->n_pkts, DO_PANIC, FROM_PCAP);
1113 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1115 pcap_read_pkts(handle, targ->pcap_file, task->orig_n_pkts, task->pkt_template_orig, NULL, max_frame_size);
1117 task_gen_reset_pkt_templates(task);
1118 check_all_pkt_size(task, DO_PANIC);
1119 check_all_fields_in_bounds(task, DO_PANIC);
1120 task_gen_set_eth_ip_udp_sizes(task, task->orig_n_pkts, task->imix_nb_pkts, task->imix_pkt_sizes);
1123 static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint16_t max_frame_size)
1125 static char name[] = "gen_pool";
1126 struct rte_mempool *ret;
1127 const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
1130 uint32_t mbuf_size = TX_MBUF_SIZE;
1131 if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
1132 mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
1133 plog_info("\t\tCreating mempool with name '%s'\n", name);
1134 ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
1135 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
1136 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
1138 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
1139 sock_id, targ->nb_mbuf - 1);
1141 plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
1142 targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
1147 void task_gen_set_pkt_count(struct task_base *tbase, uint32_t count)
1149 struct task_gen *task = (struct task_gen *)tbase;
1151 task->pkt_count = count;
1154 int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
1156 struct task_gen *task = (struct task_gen *)tbase;
1159 for (size_t i = 0; i < task->n_pkts; ++i) {
1160 if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
1162 if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
1165 for (size_t i = 0; i < task->n_pkts; ++i) {
1166 task->pkt_template[i].len = pkt_size;
1171 int task_gen_set_imix(struct task_base *tbase, uint32_t nb_pkt_sizes, uint32_t *pkt_sizes)
1173 struct task_gen *task = (struct task_gen *)tbase;
1176 memcpy(task->imix_pkt_sizes, pkt_sizes, nb_pkt_sizes * sizeof(uint32_t));
1177 for (size_t i = 0; i < nb_pkt_sizes; ++i) {
1178 if ((rc = check_pkt_size(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1180 if ((rc = check_fields_in_bounds(task, pkt_sizes[i], DO_NOT_PANIC)) != 0)
1183 // only set new_imix_nb_pkts if checks of pkt sizes succeeded
1184 task->new_imix_nb_pkts = nb_pkt_sizes;
1188 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
1190 struct task_gen *task = (struct task_gen *)tbase;
1192 task->new_rate_bps = bps;
1195 void task_gen_reset_randoms(struct task_base *tbase)
1197 struct task_gen *task = (struct task_gen *)tbase;
1199 for (uint32_t i = 0; i < task->n_rands; ++i) {
1200 task->rand[i].rand_mask = 0;
1201 task->rand[i].fixed_bits = 0;
1202 task->rand[i].rand_offset = 0;
1207 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
1209 struct task_gen *task = (struct task_gen *)tbase;
1211 if (offset + len > task->max_frame_size)
1213 for (size_t i = 0; i < task->n_pkts; ++i) {
1214 uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
1215 uint8_t *dst = task->pkt_template[i].buf;
1217 rte_memcpy(dst + offset, &to_write, len);
1220 task_gen_pkt_template_recalc_all(task);
1225 void task_gen_reset_values(struct task_base *tbase)
1227 struct task_gen *task = (struct task_gen *)tbase;
1229 task_gen_reset_pkt_templates_content(task);
1230 if (task->flags & TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC) {
1231 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1232 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1237 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
1239 struct task_gen *task = (struct task_gen *)tbase;
1241 return task->n_rands;
1244 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
1246 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1247 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1248 uint32_t max_frame_size;
1250 task->loop = targ->loop;
1252 task->hz = rte_get_tsc_hz();
1254 char err[PCAP_ERRBUF_SIZE];
1255 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1256 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1258 task->n_pkts = pcap_count_pkts(handle, &max_frame_size);
1259 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
1261 task->local_mbuf.mempool = task_gen_create_mempool(targ, max_frame_size);
1263 PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
1266 plogx_info("Configured to load %u packets\n", targ->n_pkts);
1267 if (task->n_pkts > targ->n_pkts)
1268 task->n_pkts = targ->n_pkts;
1270 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1272 size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
1273 uint8_t *mem = prox_zmalloc(mem_size, task->socket_id);
1275 PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
1276 task->proto = (struct pkt_template *) mem;
1277 task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
1279 for (uint i = 0; i < targ->n_pkts; i++) {
1280 task->proto[i].buf = prox_zmalloc(max_frame_size, task->socket_id);
1281 PROX_PANIC(task->proto[i].buf == NULL, "Failed to allocate %u bytes (in huge pages) for pcap file\n", max_frame_size);
1284 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc, max_frame_size);
1288 static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offset)
1290 for (uint32_t i = 0; i < task->n_rands; ++i) {
1291 if (task->rand[i].rand_offset == offset) {
1299 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
1301 struct task_gen *task = (struct task_gen *)tbase;
1302 uint32_t existing_rand;
1304 if (rand_id == UINT32_MAX && task->n_rands == 64) {
1305 plog_err("Too many randoms\n");
1308 uint32_t mask, fixed, len;
1310 if (parse_random_str(&mask, &fixed, &len, rand_str)) {
1311 plog_err("%s\n", get_parse_err());
1314 task->runtime_checksum_needed = 1;
1316 existing_rand = task_gen_find_random_with_offset(task, offset);
1317 if (existing_rand != UINT32_MAX) {
1318 plog_warn("Random at offset %d already set => overwriting len = %d %s\n", offset, len, rand_str);
1319 rand_id = existing_rand;
1320 task->rand[rand_id].rand_len = len;
1321 task->rand[rand_id].rand_offset = offset;
1322 task->rand[rand_id].rand_mask = mask;
1323 task->rand[rand_id].fixed_bits = fixed;
1327 task->rand[task->n_rands].rand_len = len;
1328 task->rand[task->n_rands].rand_offset = offset;
1329 task->rand[task->n_rands].rand_mask = mask;
1330 task->rand[task->n_rands].fixed_bits = fixed;
1336 static void start(struct task_base *tbase)
1338 struct task_gen *task = (struct task_gen *)tbase;
1339 task->pkt_queue_index = 0;
1341 task_gen_reset_token_time(task);
1342 if (tbase->l3.tmaster) {
1343 register_all_ip_to_ctrl_plane(task);
1347 Handle the case when two tasks transmit to the same port
1348 and one of them is stopped. In that case ARP (requests or replies)
1349 might not be sent. Master will have to keep a list of rings.
1350 stop will have to de-register IP from ctrl plane.
1351 un-registration will remove the ring. when having more than
1352 one active rings, master can always use the first one
1356 static void start_pcap(struct task_base *tbase)
1358 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1359 /* When we start, the first packet is sent immediately. */
1360 task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
1364 static void init_task_gen_early(struct task_args *targ)
1366 uint8_t *generator_count = prox_sh_find_system("generator_count");
1368 if (generator_count == NULL) {
1369 generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id));
1370 PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n");
1371 prox_sh_add_system("generator_count", generator_count);
1373 targ->generator_id = *generator_count;
1374 (*generator_count)++;
1377 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
1379 struct task_gen *task = (struct task_gen *)tbase;
1380 task->socket_id = rte_lcore_to_socket_id(targ->lconf->id);
1382 task->packet_id_pos = targ->packet_id_pos;
1384 struct prox_port_cfg *port = find_reachable_port(targ);
1385 // TODO: check that all reachable ports have the same mtu...
1387 task->cksum_offload = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
1389 task->max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + 2 * PROX_VLAN_TAG_SIZE;
1391 // Not generating to any port...
1392 task->max_frame_size = PROX_RTE_ETHER_MAX_LEN;
1394 task->local_mbuf.mempool = task_gen_create_mempool(targ, task->max_frame_size);
1395 PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
1397 task->hz = rte_get_tsc_hz();
1398 task->lat_pos = targ->lat_pos;
1399 task->accur_pos = targ->accur_pos;
1400 task->sig_pos = targ->sig_pos;
1401 task->sig = targ->sig;
1402 task->new_rate_bps = targ->rate_bps;
1405 * For tokens, use 10 Gbps as base rate
1406 * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
1407 * Script can query prox "port info" command to find out the port link speed to know
1408 * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
1409 * probably also to check the driver (as returned by the same "port info" command.
1411 struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
1412 token_time_init(&task->token_time, &tt_cfg);
1414 init_task_gen_seeds(task);
1416 task->min_bulk_size = targ->min_bulk_size;
1417 task->max_bulk_size = targ->max_bulk_size;
1418 if (task->min_bulk_size < 1)
1419 task->min_bulk_size = 1;
1420 if (task->max_bulk_size < 1)
1421 task->max_bulk_size = 64;
1422 PROX_PANIC(task->max_bulk_size > 64, "max_bulk_size higher than 64\n");
1423 PROX_PANIC(task->max_bulk_size < task->min_bulk_size, "max_bulk_size must be > than min_bulk_size\n");
1425 task->pkt_count = -1;
1426 task->lat_enabled = targ->lat_enabled;
1427 task->runtime_flags = targ->runtime_flags;
1428 PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
1430 task->generator_id = targ->generator_id;
1431 plog_info("\tGenerator id = %d\n", task->generator_id);
1433 // Allocate array holding bytes to tsc for supported frame sizes
1434 task->bytes_to_tsc = prox_zmalloc(task->max_frame_size * MAX_PKT_BURST * sizeof(task->bytes_to_tsc[0]), task->socket_id);
1435 PROX_PANIC(task->bytes_to_tsc == NULL,
1436 "Failed to allocate %u bytes (in huge pages) for bytes_to_tsc\n", task->max_frame_size);
1438 // task->port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
1439 // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
1440 uint64_t bytes_per_hz = UINT64_MAX;
1441 if ((task->port) && (task->port->max_link_speed != UINT32_MAX)) {
1442 bytes_per_hz = task->port->max_link_speed * 125000L;
1443 plog_info("\tPort %u: max link speed is %ld Mbps\n",
1444 (uint8_t)(task->port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
1446 // There are cases where hz estimate might be slighly over-estimated
1447 // This results in too much extrapolation
1448 // Only account for 99% of extrapolation to handle cases with up to 1% error clocks
1449 for (unsigned int i = 0; i < task->max_frame_size * MAX_PKT_BURST ; i++) {
1450 if (bytes_per_hz == UINT64_MAX)
1451 task->bytes_to_tsc[i] = 0;
1453 task->bytes_to_tsc[i] = (task->hz * i * 0.99) / bytes_per_hz;
1456 task->imix_nb_pkts = targ->imix_nb_pkts;
1457 for (uint32_t i = 0; i < targ->imix_nb_pkts; i++) {
1458 task->imix_pkt_sizes[i] = targ->imix_pkt_sizes[i];
1460 if (!strcmp(targ->pcap_file, "")) {
1461 plog_info("\tUsing inline definition of a packet\n");
1462 task_init_gen_load_pkt_inline(task, targ);
1464 plog_info("Loading from pcap %s\n", targ->pcap_file);
1465 task_init_gen_load_pcap(task, targ);
1468 PROX_PANIC(((targ->nb_txrings == 0) && (targ->nb_txports == 0)), "Gen mode requires a tx ring or a tx port");
1469 if ((targ->flags & DSF_KEEP_SRC_MAC) == 0) {
1470 task->flags |= TASK_OVERWRITE_SRC_MAC_WITH_PORT_MAC;
1471 memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(prox_rte_ether_addr));
1472 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1473 rte_memcpy(&task->pkt_template[i].buf[sizeof(prox_rte_ether_addr)], &task->src_mac, sizeof(prox_rte_ether_addr));
1476 for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
1477 PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
1478 "Failed to add random\n");
1482 static struct task_init task_init_gen = {
1484 .init = init_task_gen,
1485 .handle = handle_gen_bulk,
1487 .early_init = init_task_gen_early,
1489 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1490 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1491 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1493 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1495 .size = sizeof(struct task_gen)
1498 static struct task_init task_init_gen_l3 = {
1500 .sub_mode_str = "l3",
1501 .init = init_task_gen,
1502 .handle = handle_gen_bulk,
1504 .early_init = init_task_gen_early,
1506 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1507 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1508 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1510 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1512 .size = sizeof(struct task_gen)
1515 /* This mode uses time stamps in the pcap file */
1516 static struct task_init task_init_gen_pcap = {
1518 .sub_mode_str = "pcap",
1519 .init = init_task_gen_pcap,
1520 .handle = handle_gen_pcap_bulk,
1521 .start = start_pcap,
1522 .early_init = init_task_gen_early,
1524 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
1526 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1528 .size = sizeof(struct task_gen_pcap)
1531 __attribute__((constructor)) static void reg_task_gen(void)
1533 reg_task(&task_init_gen);
1534 reg_task(&task_init_gen_l3);
1535 reg_task(&task_init_gen_pcap);