2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
21 #include <rte_cycles.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
24 #include <rte_ether.h>
25 #include <rte_hash_crc.h>
27 #include "prox_shared.h"
29 #include "prox_malloc.h"
30 #include "handle_gen.h"
31 #include "handle_lat.h"
32 #include "task_init.h"
33 #include "task_base.h"
34 #include "prox_port_cfg.h"
39 #include "mbuf_utils.h"
41 #include "prox_cksum.h"
43 #include "prox_assert.h"
45 #include "token_time.h"
46 #include "local_mbuf.h"
49 #include "handle_master.h"
55 uint8_t buf[ETHER_MAX_LEN];
58 #define MAX_TEMPLATE_INDEX 65536
59 #define TEMPLATE_INDEX_MASK (MAX_TEMPLATE_INDEX - 1)
60 #define MBUF_ARP MAX_TEMPLATE_INDEX
62 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
64 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
66 const uint32_t pkt_size = pkt_template->len;
68 rte_pktmbuf_pkt_len(mbuf) = pkt_size;
69 rte_pktmbuf_data_len(mbuf) = pkt_size;
71 rte_memcpy(pkt, pkt_template->buf, pkt_template->len);
74 struct task_gen_pcap {
75 struct task_base base;
77 struct local_mbuf local_mbuf;
79 struct pkt_template *proto;
87 struct task_base base;
90 struct token_time token_time;
91 struct local_mbuf local_mbuf;
92 struct pkt_template *pkt_template; /* packet templates used at runtime */
93 uint64_t write_duration_estimate; /* how long it took previously to write the time stamps in the packets */
94 uint64_t earliest_tsc_next_pkt;
95 uint64_t new_rate_bps;
96 uint64_t pkt_queue_index;
97 uint32_t n_pkts; /* number of packets in pcap */
98 uint32_t pkt_idx; /* current packet from pcap */
99 uint32_t pkt_count; /* how many pakets to generate */
100 uint32_t runtime_flags;
102 uint16_t packet_id_pos;
106 uint8_t generator_id;
107 uint8_t n_rands; /* number of randoms */
108 uint8_t min_bulk_size;
109 uint8_t max_bulk_size;
111 uint8_t runtime_checksum_needed;
114 uint32_t rand_mask; /* since the random vals are uniform, masks don't introduce bias */
115 uint32_t fixed_bits; /* length of each random (max len = 4) */
116 uint16_t rand_offset; /* each random has an offset*/
117 uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
120 uint64_t pkt_tsc_offset[64];
121 struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
122 struct ether_addr src_mac;
124 uint8_t cksum_offload;
125 } __rte_cache_aligned;
127 static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
129 /* Optimize for common case of IPv4 header without options. */
130 if (ip->version_ihl == 0x45)
131 return sizeof(struct ipv4_hdr);
132 if (unlikely(ip->version_ihl >> 4 != 4)) {
133 plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
136 return (ip->version_ihl & 0xF) * 4;
139 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
141 *l2_len = sizeof(struct ether_hdr);
143 struct vlan_hdr *vlan_hdr;
144 struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
146 uint16_t ether_type = eth_hdr->ether_type;
149 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(struct vlan_hdr) < len)) {
150 vlan_hdr = (struct vlan_hdr *)(pkt + *l2_len);
152 ether_type = vlan_hdr->eth_proto;
155 // No L3 cksum offload for IPv6, but TODO L4 offload
156 // ETYPE_EoGRE CRC not implemented yet
158 switch (ether_type) {
172 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
177 struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + *l2_len);
178 *l3_len = ipv4_get_hdr_len(ip);
182 static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_template *pkt_template, int cksum_offload)
184 uint16_t l2_len = pkt_template->l2_len;
185 uint16_t l3_len = pkt_template->l3_len;
188 struct ipv4_hdr *ip = (struct ipv4_hdr*)(hdr + l2_len);
189 prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
193 static void task_gen_reset_token_time(struct task_gen *task)
195 token_time_set_bpp(&task->token_time, task->new_rate_bps);
196 token_time_reset(&task->token_time, rte_rdtsc(), 0);
199 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
201 if (task->pkt_count == (uint32_t)-1)
204 if (task->pkt_count >= send_bulk)
205 task->pkt_count -= send_bulk;
211 static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts)
213 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
214 uint64_t now = rte_rdtsc();
215 uint64_t send_bulk = 0;
216 uint32_t pkt_idx_tmp = task->pkt_idx;
218 if (pkt_idx_tmp == task->n_pkts) {
219 PROX_ASSERT(task->loop);
223 for (uint16_t j = 0; j < 64; ++j) {
224 uint64_t tsc = task->proto_tsc[pkt_idx_tmp];
225 if (task->last_tsc + tsc <= now) {
226 task->last_tsc += tsc;
229 if (pkt_idx_tmp == task->n_pkts) {
240 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
241 if (new_pkts == NULL)
244 for (uint16_t j = 0; j < send_bulk; ++j) {
245 struct rte_mbuf *next_pkt = new_pkts[j];
246 struct pkt_template *pkt_template = &task->proto[task->pkt_idx];
247 uint8_t *hdr = rte_pktmbuf_mtod(next_pkt, uint8_t *);
249 pkt_template_init_mbuf(pkt_template, next_pkt, hdr);
252 if (task->pkt_idx == task->n_pkts) {
260 return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
263 static uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
265 const uint64_t hz = task->hz;
266 const uint64_t bytes_per_hz = task->link_speed;
268 if (bytes_per_hz == UINT64_MAX)
271 return hz * bytes / bytes_per_hz;
274 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
276 return pkt_idx + 1 == task->n_pkts? 0 : pkt_idx + 1;
279 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
281 return (task->pkt_idx + offset) % task->n_pkts;
284 static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *total_bytes)
286 /* The biggest bulk we allow to send is task->max_bulk_size
287 packets. The max bulk size can also be limited by the
288 pkt_count field. At the same time, we are rate limiting
289 based on the specified speed (in bytes per second) so token
290 bucket based rate limiting must also be applied. The
291 minimum bulk size is also constrained. If the calculated
292 bulk size is less then the minimum, then don't send
295 const uint32_t min_bulk = task->min_bulk_size;
296 uint32_t max_bulk = task->max_bulk_size;
298 if (task->pkt_count != (uint32_t)-1 && task->pkt_count < max_bulk) {
299 max_bulk = task->pkt_count;
302 uint32_t send_bulk = 0;
303 uint32_t pkt_idx_tmp = task->pkt_idx;
304 uint32_t would_send_bytes = 0;
308 * TODO - this must be improved to take into account the fact that, after applying randoms
309 * The packet can be replaced by an ARP
311 for (uint16_t j = 0; j < max_bulk; ++j) {
312 struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
313 pkt_size = pktpl->len;
314 uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
315 if (pkt_len + would_send_bytes > task->token_time.bytes_now)
318 pkt_idx_tmp = task_gen_next_pkt_idx(task, pkt_idx_tmp);
321 would_send_bytes += pkt_len;
324 if (send_bulk < min_bulk)
326 *total_bytes = would_send_bytes;
330 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
332 uint32_t ret, ret_tmp;
334 for (uint16_t i = 0; i < task->n_rands; ++i) {
335 ret = random_next(&task->rand[i].state);
336 ret_tmp = (ret & task->rand[i].rand_mask) | task->rand[i].fixed_bits;
338 ret_tmp = rte_bswap32(ret_tmp);
339 /* At this point, the lower order bytes (BE) contain
340 the generated value. The address where the values
341 of interest starts is at ret_tmp + 4 - rand_len. */
342 uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
343 rte_memcpy(hdr + task->rand[i].rand_offset, pret_tmp + 4 - task->rand[i].rand_len, task->rand[i].rand_len);
347 static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
352 for (uint16_t i = 0; i < count; ++i)
353 task_gen_apply_random_fields(task, pkt_hdr[i]);
356 static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
358 *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
361 static void task_gen_apply_sig(struct task_gen *task, uint8_t *pkt_hdr)
363 *(uint32_t *)(pkt_hdr + task->sig_pos) = task->sig;
366 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
368 if (!task->accur_pos)
371 /* The accuracy of task->pkt_queue_index - 64 is stored in
372 packet task->pkt_queue_index. The ID modulo 64 is the
374 for (uint16_t j = 0; j < count; ++j) {
375 if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
376 uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & 63];
377 task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
382 static void task_gen_apply_all_sig(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
387 for (uint16_t j = 0; j < count; ++j) {
388 if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
389 task_gen_apply_sig(task, pkt_hdr[j]);
394 static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
396 struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_pos);
401 static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
403 if (!task->packet_id_pos)
406 for (uint16_t i = 0; i < count; ++i) {
407 if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
409 unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
410 task_gen_apply_unique_id(task, pkt_hdr[i], &id);
415 static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
417 if (!(task->runtime_flags & TASK_TX_CRC))
420 if (!task->runtime_checksum_needed)
423 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
424 for (uint16_t i = 0; i < count; ++i) {
425 if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
426 struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
427 checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
428 pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
433 static void task_gen_consume_tokens(struct task_gen *task, uint32_t tokens, uint32_t send_count)
435 /* If max burst has been sent, we can't keep up so just assume
436 that we can (leaving a "gap" in the packet stream on the
438 task->token_time.bytes_now -= tokens;
439 if (send_count == task->max_bulk_size && task->token_time.bytes_now > tokens) {
440 task->token_time.bytes_now = tokens;
444 static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t count)
446 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
447 struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
448 uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
449 uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
450 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
452 return bulk_duration;
455 static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
457 if (!task->lat_enabled)
460 uint64_t tx_tsc, delta_t;
461 uint64_t tsc_before_tx = 0;
463 /* Just before sending the packets, apply the time stamp
464 relative to when the first packet will be sent. The first
465 packet will be sent now. The time is read for each packet
466 to reduce the error towards the actual time the packet will
468 uint64_t write_tsc_after, write_tsc_before;
470 write_tsc_before = rte_rdtsc();
472 /* The time it took previously to write the time stamps in the
473 packets is used as an estimate for how long it will take to
474 write the time stamps now. The estimated time at which the
475 packets will actually be sent will be at tx_tsc. */
476 tx_tsc = write_tsc_before + task->write_duration_estimate;
478 /* The offset delta_t tracks the difference between the actual
479 time and the time written in the packets. Adding the offset
480 to the actual time insures that the time written in the
481 packets is monotonically increasing. At the same time,
482 simply sleeping until delta_t is zero would leave a period
483 of silence on the line. The error has been introduced
484 earlier, but the packets have already been sent. */
485 if (tx_tsc < task->earliest_tsc_next_pkt)
486 delta_t = task->earliest_tsc_next_pkt - tx_tsc;
490 for (uint16_t i = 0; i < count; ++i) {
491 uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
492 const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
494 *pos = pkt_tsc >> LATENCY_ACCURACY;
497 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
499 task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
500 write_tsc_after = rte_rdtsc();
501 task->write_duration_estimate = write_tsc_after - write_tsc_before;
503 /* Make sure that the time stamps that were written
504 are valid. The offset must be taken into account */
506 tsc_before_tx = rte_rdtsc();
507 } while (tsc_before_tx < tx_tsc);
508 return tsc_before_tx;
511 static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint64_t tsc_before_tx)
513 if (!task->accur_pos)
516 uint64_t accur = rte_rdtsc() - tsc_before_tx;
517 uint64_t first_accuracy_idx = task->pkt_queue_index - count;
519 for (uint32_t i = 0; i < count; ++i) {
520 uint32_t accuracy_idx = (first_accuracy_idx + i) & 63;
522 task->accur[accuracy_idx] = accur;
526 static void task_gen_load_and_prefetch(struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
528 for (uint16_t i = 0; i < count; ++i)
529 rte_prefetch0(mbufs[i]);
530 for (uint16_t i = 0; i < count; ++i)
531 pkt_hdr[i] = rte_pktmbuf_mtod(mbufs[i], uint8_t *);
532 for (uint16_t i = 0; i < count; ++i)
533 rte_prefetch0(pkt_hdr[i]);
536 static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
538 uint64_t will_send_bytes = 0;
540 for (uint16_t i = 0; i < count; ++i) {
541 struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
542 struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
543 pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
544 mbufs[i]->udata64 = task->pkt_idx & TEMPLATE_INDEX_MASK;
545 struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i];
546 if (task->lat_enabled) {
547 task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
548 will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
550 task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
554 static void task_gen_update_config(struct task_gen *task)
556 if (task->token_time.cfg.bpp != task->new_rate_bps)
557 task_gen_reset_token_time(task);
560 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
562 struct task_base *tbase = (struct task_base *)task;
564 build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
566 build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
569 register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
572 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
574 struct task_base *tbase = (struct task_base *)task;
579 for (uint32_t i = 0; i < task->n_pkts; ++i) {
580 struct pkt_template *pktpl = &task->pkt_template[i];
581 unsigned int ip_src_pos = 0;
583 unsigned int l2_len = sizeof(struct ether_hdr);
585 uint8_t *pkt = pktpl->buf;
586 struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
587 uint16_t ether_type = eth_hdr->ether_type;
588 struct vlan_hdr *vlan_hdr;
591 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(struct vlan_hdr) < pktpl->len)) {
592 vlan_hdr = (struct vlan_hdr *)(pkt + l2_len);
594 ether_type = vlan_hdr->eth_proto;
596 if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
600 if ((ether_type != ETYPE_IPv4) && !maybe_ipv4)
603 struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + l2_len);
604 PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
606 // Even if IPv4 header contains options, options are after ip src and dst
607 ip_src_pos = l2_len + sizeof(struct ipv4_hdr) - 2 * sizeof(uint32_t);
608 uint32_t *ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
609 plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
610 register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
612 for (int j = 0; j < task->n_rands; j++) {
613 offset = task->rand[j].rand_offset;
614 len = task->rand[j].rand_len;
615 mask = task->rand[j].rand_mask;
616 fixed = task->rand[j].fixed_bits;
617 plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
618 if ((offset < ip_src_pos + 4) && (offset + len >= ip_src_pos)) {
619 if (offset >= ip_src_pos) {
620 int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
621 mask = mask & ip_src_mask;
622 fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
623 build_value(task, mask, 0, 0, fixed);
625 int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
627 fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
628 build_value(task, mask, 0, 0, fixed);
635 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
637 struct task_gen *task = (struct task_gen *)tbase;
638 uint8_t out[MAX_PKT_BURST] = {0};
643 task_gen_update_config(task);
645 if (task->pkt_count == 0) {
646 task_gen_reset_token_time(task);
649 if (!task->token_time.cfg.bpp)
652 token_time_update(&task->token_time, rte_rdtsc());
654 uint32_t would_send_bytes;
655 uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
659 task_gen_take_count(task, send_bulk);
660 task_gen_consume_tokens(task, would_send_bytes, send_bulk);
662 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
663 if (new_pkts == NULL)
665 uint8_t *pkt_hdr[MAX_RING_BURST];
667 task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
668 task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
669 task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
670 task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
671 task_gen_apply_all_sig(task, new_pkts, pkt_hdr, send_bulk);
672 task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
674 uint64_t tsc_before_tx;
676 tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
677 task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
678 ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
679 task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
683 static void init_task_gen_seeds(struct task_gen *task)
685 for (size_t i = 0; i < sizeof(task->rand)/sizeof(task->rand[0]); ++i)
686 random_init_seed(&task->rand[i].state);
689 static uint32_t pcap_count_pkts(pcap_t *handle)
691 struct pcap_pkthdr header;
694 long pkt1_fpos = ftell(pcap_file(handle));
696 while ((buf = pcap_next(handle, &header))) {
699 int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
700 PROX_PANIC(ret2 != 0, "Failed to reset reading pcap file\n");
704 static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
706 uint64_t tot_inter_pkt = 0;
708 for (uint32_t i = 0; i < n; ++i)
709 tot_inter_pkt += time_stamp[i];
710 return (tot_inter_pkt + n / 2)/n;
713 static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp)
715 struct pcap_pkthdr header;
719 for (uint32_t i = 0; i < n_pkts; ++i) {
720 buf = pcap_next(handle, &header);
722 PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
723 proto[i].len = header.len;
724 len = RTE_MIN(header.len, sizeof(proto[i].buf));
725 if (header.len > len)
726 plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
729 static struct timeval beg;
735 tv = tv_diff(&beg, &header.ts);
736 tv_to_tsc(&tv, time_stamp + i);
738 rte_memcpy(proto[i].buf, buf, len);
741 if (time_stamp && n_pkts) {
742 for (uint32_t i = n_pkts - 1; i > 0; --i)
743 time_stamp[i] -= time_stamp[i - 1];
744 /* Since the handle function will loop the packets,
745 there is one time-stamp that is not provided by the
746 pcap file. This is the time between the last and
747 the first packet. This implementation takes the
748 average of the inter-packet times here. */
750 time_stamp[0] = avg_time_stamp(time_stamp + 1, n_pkts - 1);
756 static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
758 const uint16_t min_len = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr);
759 const uint16_t max_len = ETHER_MAX_LEN - 4;
762 PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
763 PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
764 PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
768 plog_err("Invalid packet size length (no packet defined?)\n");
771 if (pkt_size > max_len) {
772 plog_err("pkt_size out of range (must be <= %u)\n", max_len);
775 if (pkt_size < min_len) {
776 plog_err("pkt_size out of range (must be >= %u)\n", min_len);
783 static int check_all_pkt_size(struct task_gen *task, int do_panic)
786 for (uint32_t i = 0; i < task->n_pkts;++i) {
787 if ((rc = check_pkt_size(task, task->pkt_template[i].len, do_panic)) != 0)
793 static void check_fields_in_bounds(struct task_gen *task)
795 const uint32_t pkt_size = task->pkt_template[0].len;
797 if (task->lat_enabled) {
798 uint32_t pos_beg = task->lat_pos;
799 uint32_t pos_end = task->lat_pos + 3U;
801 PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
802 pos_beg, pos_end, pkt_size);
804 if (task->packet_id_pos) {
805 uint32_t pos_beg = task->packet_id_pos;
806 uint32_t pos_end = task->packet_id_pos + 4U;
808 PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
809 pos_beg, pos_end, pkt_size);
811 if (task->accur_pos) {
812 uint32_t pos_beg = task->accur_pos;
813 uint32_t pos_end = task->accur_pos + 3U;
815 PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u%-u, but packet size is %u bytes\n",
816 pos_beg, pos_end, pkt_size);
820 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
822 struct pkt_template *template;
824 for (size_t i = 0; i < task->n_pkts; ++i) {
825 template = &task->pkt_template[i];
826 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
830 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
832 struct pkt_template *template;
835 task->runtime_checksum_needed = 0;
836 for (size_t i = 0; i < task->n_pkts; ++i) {
837 template = &task->pkt_template[i];
838 if (template->l2_len == 0)
840 ip = (struct ipv4_hdr *)(template->buf + template->l2_len);
842 ip->hdr_checksum = 0;
843 prox_ip_cksum_sw(ip);
844 uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
846 if (ip->next_proto_id == IPPROTO_UDP) {
847 struct udp_hdr *udp = (struct udp_hdr *)(((uint8_t *)ip) + template->l3_len);
848 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
849 } else if (ip->next_proto_id == IPPROTO_TCP) {
850 struct tcp_hdr *tcp = (struct tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
851 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
854 /* The current implementation avoids checksum
855 calculation by determining that at packet
856 construction time, no fields are applied that would
857 require a recalculation of the checksum. */
858 if (task->lat_enabled && task->lat_pos > template->l2_len)
859 task->runtime_checksum_needed = 1;
860 if (task->accur_pos > template->l2_len)
861 task->runtime_checksum_needed = 1;
862 if (task->packet_id_pos > template->l2_len)
863 task->runtime_checksum_needed = 1;
867 static void task_gen_pkt_template_recalc_all(struct task_gen *task)
869 task_gen_pkt_template_recalc_metadata(task);
870 task_gen_pkt_template_recalc_checksum(task);
873 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
875 struct pkt_template *src, *dst;
877 for (size_t i = 0; i < task->n_pkts; ++i) {
878 src = &task->pkt_template_orig[i];
879 dst = &task->pkt_template[i];
884 static void task_gen_reset_pkt_templates_content(struct task_gen *task)
886 struct pkt_template *src, *dst;
888 for (size_t i = 0; i < task->n_pkts; ++i) {
889 src = &task->pkt_template_orig[i];
890 dst = &task->pkt_template[i];
891 memcpy(dst->buf, src->buf, dst->len);
895 static void task_gen_reset_pkt_templates(struct task_gen *task)
897 task_gen_reset_pkt_templates_len(task);
898 task_gen_reset_pkt_templates_content(task);
899 task_gen_pkt_template_recalc_all(task);
902 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
904 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
906 if (targ->pkt_size > sizeof(task->pkt_template[0].buf))
907 targ->pkt_size = sizeof(task->pkt_template[0].buf);
910 size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
911 task->pkt_template = prox_zmalloc(mem_size, socket_id);
912 task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
914 PROX_PANIC(task->pkt_template == NULL ||
915 task->pkt_template_orig == NULL,
916 "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
918 rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, targ->pkt_size);
919 task->pkt_template_orig[0].len = targ->pkt_size;
920 task_gen_reset_pkt_templates(task);
921 check_all_pkt_size(task, 1);
922 check_fields_in_bounds(task);
925 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
927 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
928 char err[PCAP_ERRBUF_SIZE];
929 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
930 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
932 task->n_pkts = pcap_count_pkts(handle);
933 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
936 task->n_pkts = RTE_MIN(task->n_pkts, targ->n_pkts);
937 PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
938 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
939 size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
940 task->pkt_template = prox_zmalloc(mem_size, socket_id);
941 task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
942 PROX_PANIC(task->pkt_template == NULL ||
943 task->pkt_template_orig == NULL,
944 "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
946 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->pkt_template_orig, NULL);
948 task_gen_reset_pkt_templates(task);
951 static struct rte_mempool *task_gen_create_mempool(struct task_args *targ)
953 static char name[] = "gen_pool";
954 struct rte_mempool *ret;
955 const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
958 ret = rte_mempool_create(name, targ->nb_mbuf - 1, MBUF_SIZE,
959 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
960 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
962 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
963 sock_id, targ->nb_mbuf - 1);
967 void task_gen_set_pkt_count(struct task_base *tbase, uint32_t count)
969 struct task_gen *task = (struct task_gen *)tbase;
971 task->pkt_count = count;
974 int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
976 struct task_gen *task = (struct task_gen *)tbase;
979 task->pkt_template[0].len = pkt_size;
980 if ((rc = check_all_pkt_size(task, 0)) != 0)
982 check_fields_in_bounds(task);
986 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
988 struct task_gen *task = (struct task_gen *)tbase;
990 task->new_rate_bps = bps;
993 void task_gen_reset_randoms(struct task_base *tbase)
995 struct task_gen *task = (struct task_gen *)tbase;
997 for (uint32_t i = 0; i < task->n_rands; ++i) {
998 task->rand[i].rand_mask = 0;
999 task->rand[i].fixed_bits = 0;
1000 task->rand[i].rand_offset = 0;
1005 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
1007 struct task_gen *task = (struct task_gen *)tbase;
1009 for (size_t i = 0; i < task->n_pkts; ++i) {
1010 uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
1011 uint8_t *dst = task->pkt_template[i].buf;
1013 rte_memcpy(dst + offset, &to_write, len);
1016 task_gen_pkt_template_recalc_all(task);
1021 void task_gen_reset_values(struct task_base *tbase)
1023 struct task_gen *task = (struct task_gen *)tbase;
1025 task_gen_reset_pkt_templates_content(task);
1028 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
1030 struct task_gen *task = (struct task_gen *)tbase;
1032 return task->n_rands;
1035 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
1037 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1038 const uint32_t sockid = rte_lcore_to_socket_id(targ->lconf->id);
1040 task->loop = targ->loop;
1042 task->hz = rte_get_tsc_hz();
1044 task->local_mbuf.mempool = task_gen_create_mempool(targ);
1046 PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
1048 char err[PCAP_ERRBUF_SIZE];
1049 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1050 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1052 task->n_pkts = pcap_count_pkts(handle);
1053 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
1056 plogx_info("Configured to load %u packets\n", targ->n_pkts);
1057 if (task->n_pkts > targ->n_pkts)
1058 task->n_pkts = targ->n_pkts;
1060 PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
1062 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1064 size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
1065 uint8_t *mem = prox_zmalloc(mem_size, sockid);
1067 PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
1068 task->proto = (struct pkt_template *) mem;
1069 task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
1071 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc);
1075 static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offset)
1077 for (uint32_t i = 0; i < task->n_rands; ++i) {
1078 if (task->rand[i].rand_offset == offset) {
1086 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
1088 struct task_gen *task = (struct task_gen *)tbase;
1089 uint32_t existing_rand;
1091 if (rand_id == UINT32_MAX && task->n_rands == 64) {
1092 plog_err("Too many randoms\n");
1095 uint32_t mask, fixed, len;
1097 if (parse_random_str(&mask, &fixed, &len, rand_str)) {
1098 plog_err("%s\n", get_parse_err());
1101 task->runtime_checksum_needed = 1;
1103 existing_rand = task_gen_find_random_with_offset(task, offset);
1104 if (existing_rand != UINT32_MAX) {
1105 plog_warn("Random at offset %d already set => overwriting len = %d %s\n", offset, len, rand_str);
1106 rand_id = existing_rand;
1107 task->rand[rand_id].rand_len = len;
1108 task->rand[rand_id].rand_offset = offset;
1109 task->rand[rand_id].rand_mask = mask;
1110 task->rand[rand_id].fixed_bits = fixed;
1114 task->rand[task->n_rands].rand_len = len;
1115 task->rand[task->n_rands].rand_offset = offset;
1116 task->rand[task->n_rands].rand_mask = mask;
1117 task->rand[task->n_rands].fixed_bits = fixed;
1123 static void start(struct task_base *tbase)
1125 struct task_gen *task = (struct task_gen *)tbase;
1126 task->pkt_queue_index = 0;
1128 task_gen_reset_token_time(task);
1129 if (tbase->l3.tmaster) {
1130 register_all_ip_to_ctrl_plane(task);
1133 Handle the case when two tasks transmit to the same port
1134 and one of them is stopped. In that case ARP (requests or replies)
1135 might not be sent. Master will have to keep a list of rings.
1136 stop will have to de-register IP from ctrl plane.
1137 un-registration will remove the ring. when having more than
1138 one active rings, master can always use the first one
1142 static void start_pcap(struct task_base *tbase)
1144 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1145 /* When we start, the first packet is sent immediately. */
1146 task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
1150 static void init_task_gen_early(struct task_args *targ)
1152 uint8_t *generator_count = prox_sh_find_system("generator_count");
1154 if (generator_count == NULL) {
1155 generator_count = prox_zmalloc(sizeof(*generator_count), 0);
1156 prox_sh_add_system("generator_count", generator_count);
1158 targ->generator_id = *generator_count;
1159 (*generator_count)++;
1162 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
1164 struct task_gen *task = (struct task_gen *)tbase;
1166 task->packet_id_pos = targ->packet_id_pos;
1168 task->local_mbuf.mempool = task_gen_create_mempool(targ);
1169 PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
1171 task->hz = rte_get_tsc_hz();
1172 task->lat_pos = targ->lat_pos;
1173 task->accur_pos = targ->accur_pos;
1174 task->sig_pos = targ->sig_pos;
1175 task->sig = targ->sig;
1176 task->new_rate_bps = targ->rate_bps;
1178 struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
1180 token_time_init(&task->token_time, &tt_cfg);
1181 init_task_gen_seeds(task);
1183 task->min_bulk_size = targ->min_bulk_size;
1184 task->max_bulk_size = targ->max_bulk_size;
1185 if (task->min_bulk_size < 1)
1186 task->min_bulk_size = 1;
1187 if (task->max_bulk_size < 1)
1188 task->max_bulk_size = 64;
1189 PROX_PANIC(task->max_bulk_size > 64, "max_bulk_size higher than 64\n");
1190 PROX_PANIC(task->max_bulk_size < task->min_bulk_size, "max_bulk_size must be > than min_bulk_size\n");
1192 task->pkt_count = -1;
1193 task->lat_enabled = targ->lat_enabled;
1194 task->runtime_flags = targ->runtime_flags;
1195 PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
1197 task->generator_id = targ->generator_id;
1198 task->link_speed = UINT64_MAX;
1199 if (targ->nb_txrings == 0 && targ->nb_txports == 1)
1200 task->link_speed = 1250000000;
1202 if (!strcmp(targ->pcap_file, "")) {
1203 plog_info("\tUsing inline definition of a packet\n");
1204 task_init_gen_load_pkt_inline(task, targ);
1206 plog_info("Loading from pcap %s\n", targ->pcap_file);
1207 task_init_gen_load_pcap(task, targ);
1210 if ((targ->flags & DSF_KEEP_SRC_MAC) == 0 && (targ->nb_txrings || targ->nb_txports)) {
1211 uint8_t *src_addr = prox_port_cfg[tbase->tx_params_hw.tx_port_queue->port].eth_addr.addr_bytes;
1212 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1213 rte_memcpy(&task->pkt_template[i].buf[6], src_addr, 6);
1216 memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(struct ether_addr));
1217 for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
1218 PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
1219 "Failed to add random\n");
1222 struct prox_port_cfg *port = find_reachable_port(targ);
1224 task->cksum_offload = port->capabilities.tx_offload_cksum;
1228 static struct task_init task_init_gen = {
1230 .init = init_task_gen,
1231 .handle = handle_gen_bulk,
1234 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1235 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1236 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
1238 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1240 .size = sizeof(struct task_gen)
1243 static struct task_init task_init_gen_l3 = {
1245 .sub_mode_str = "l3",
1246 .init = init_task_gen,
1247 .handle = handle_gen_bulk,
1250 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1251 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1252 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
1254 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1256 .size = sizeof(struct task_gen)
1259 static struct task_init task_init_gen_pcap = {
1261 .sub_mode_str = "pcap",
1262 .init = init_task_gen_pcap,
1263 .handle = handle_gen_pcap_bulk,
1264 .start = start_pcap,
1266 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
1268 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1270 .size = sizeof(struct task_gen_pcap)
1273 __attribute__((constructor)) static void reg_task_gen(void)
1275 reg_task(&task_init_gen);
1276 reg_task(&task_init_gen_l3);
1277 reg_task(&task_init_gen_pcap);