2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
21 #include <rte_cycles.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
24 #include <rte_ether.h>
25 #include <rte_hash_crc.h>
27 #include "prox_shared.h"
29 #include "prox_malloc.h"
30 #include "handle_gen.h"
31 #include "handle_lat.h"
32 #include "task_init.h"
33 #include "task_base.h"
34 #include "prox_port_cfg.h"
39 #include "mbuf_utils.h"
41 #include "prox_cksum.h"
43 #include "prox_assert.h"
45 #include "token_time.h"
46 #include "local_mbuf.h"
49 #include "handle_master.h"
55 uint8_t buf[ETHER_MAX_LEN];
58 #define MAX_TEMPLATE_INDEX 65536
59 #define TEMPLATE_INDEX_MASK (MAX_TEMPLATE_INDEX - 1)
60 #define MBUF_ARP MAX_TEMPLATE_INDEX
62 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
64 static void pkt_template_init_mbuf(struct pkt_template *pkt_template, struct rte_mbuf *mbuf, uint8_t *pkt)
66 const uint32_t pkt_size = pkt_template->len;
68 rte_pktmbuf_pkt_len(mbuf) = pkt_size;
69 rte_pktmbuf_data_len(mbuf) = pkt_size;
71 rte_memcpy(pkt, pkt_template->buf, pkt_template->len);
74 struct task_gen_pcap {
75 struct task_base base;
77 struct local_mbuf local_mbuf;
79 struct pkt_template *proto;
87 struct task_base base;
90 struct token_time token_time;
91 struct local_mbuf local_mbuf;
92 struct pkt_template *pkt_template; /* packet templates used at runtime */
93 uint64_t write_duration_estimate; /* how long it took previously to write the time stamps in the packets */
94 uint64_t earliest_tsc_next_pkt;
95 uint64_t new_rate_bps;
96 uint64_t pkt_queue_index;
97 uint32_t n_pkts; /* number of packets in pcap */
98 uint32_t pkt_idx; /* current packet from pcap */
99 uint32_t pkt_count; /* how many pakets to generate */
100 uint32_t runtime_flags;
102 uint16_t packet_id_pos;
106 uint8_t generator_id;
107 uint8_t n_rands; /* number of randoms */
108 uint8_t min_bulk_size;
109 uint8_t max_bulk_size;
111 uint8_t runtime_checksum_needed;
114 uint32_t rand_mask; /* since the random vals are uniform, masks don't introduce bias */
115 uint32_t fixed_bits; /* length of each random (max len = 4) */
116 uint16_t rand_offset; /* each random has an offset*/
117 uint8_t rand_len; /* # bytes to take from random (no bias introduced) */
120 uint64_t pkt_tsc_offset[64];
121 struct pkt_template *pkt_template_orig; /* packet templates (from inline or from pcap) */
122 struct ether_addr src_mac;
124 uint8_t cksum_offload;
125 struct prox_port_cfg *port;
126 } __rte_cache_aligned;
128 static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
130 /* Optimize for common case of IPv4 header without options. */
131 if (ip->version_ihl == 0x45)
132 return sizeof(struct ipv4_hdr);
133 if (unlikely(ip->version_ihl >> 4 != 4)) {
134 plog_warn("IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
137 return (ip->version_ihl & 0xF) * 4;
140 static void parse_l2_l3_len(uint8_t *pkt, uint16_t *l2_len, uint16_t *l3_len, uint16_t len)
142 *l2_len = sizeof(struct ether_hdr);
144 struct vlan_hdr *vlan_hdr;
145 struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
147 uint16_t ether_type = eth_hdr->ether_type;
150 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (*l2_len + sizeof(struct vlan_hdr) < len)) {
151 vlan_hdr = (struct vlan_hdr *)(pkt + *l2_len);
153 ether_type = vlan_hdr->eth_proto;
156 // No L3 cksum offload for IPv6, but TODO L4 offload
157 // ETYPE_EoGRE CRC not implemented yet
159 switch (ether_type) {
173 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
178 struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + *l2_len);
179 *l3_len = ipv4_get_hdr_len(ip);
183 static void checksum_packet(uint8_t *hdr, struct rte_mbuf *mbuf, struct pkt_template *pkt_template, int cksum_offload)
185 uint16_t l2_len = pkt_template->l2_len;
186 uint16_t l3_len = pkt_template->l3_len;
189 struct ipv4_hdr *ip = (struct ipv4_hdr*)(hdr + l2_len);
190 prox_ip_udp_cksum(mbuf, ip, l2_len, l3_len, cksum_offload);
194 static void task_gen_reset_token_time(struct task_gen *task)
196 token_time_set_bpp(&task->token_time, task->new_rate_bps);
197 token_time_reset(&task->token_time, rte_rdtsc(), 0);
200 static void task_gen_take_count(struct task_gen *task, uint32_t send_bulk)
202 if (task->pkt_count == (uint32_t)-1)
205 if (task->pkt_count >= send_bulk)
206 task->pkt_count -= send_bulk;
212 static int handle_gen_pcap_bulk(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts)
214 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
215 uint64_t now = rte_rdtsc();
216 uint64_t send_bulk = 0;
217 uint32_t pkt_idx_tmp = task->pkt_idx;
219 if (pkt_idx_tmp == task->n_pkts) {
220 PROX_ASSERT(task->loop);
224 for (uint16_t j = 0; j < 64; ++j) {
225 uint64_t tsc = task->proto_tsc[pkt_idx_tmp];
226 if (task->last_tsc + tsc <= now) {
227 task->last_tsc += tsc;
230 if (pkt_idx_tmp == task->n_pkts) {
241 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
242 if (new_pkts == NULL)
245 for (uint16_t j = 0; j < send_bulk; ++j) {
246 struct rte_mbuf *next_pkt = new_pkts[j];
247 struct pkt_template *pkt_template = &task->proto[task->pkt_idx];
248 uint8_t *hdr = rte_pktmbuf_mtod(next_pkt, uint8_t *);
250 pkt_template_init_mbuf(pkt_template, next_pkt, hdr);
253 if (task->pkt_idx == task->n_pkts) {
261 return task->base.tx_pkt(&task->base, new_pkts, send_bulk, NULL);
264 static uint64_t bytes_to_tsc(struct task_gen *task, uint32_t bytes)
266 const uint64_t hz = task->hz;
267 const uint64_t bytes_per_hz = task->link_speed;
269 if (bytes_per_hz == UINT64_MAX)
272 return hz * bytes / bytes_per_hz;
275 static uint32_t task_gen_next_pkt_idx(const struct task_gen *task, uint32_t pkt_idx)
277 return pkt_idx + 1 == task->n_pkts? 0 : pkt_idx + 1;
280 static uint32_t task_gen_offset_pkt_idx(const struct task_gen *task, uint32_t offset)
282 return (task->pkt_idx + offset) % task->n_pkts;
285 static uint32_t task_gen_calc_send_bulk(const struct task_gen *task, uint32_t *total_bytes)
287 /* The biggest bulk we allow to send is task->max_bulk_size
288 packets. The max bulk size can also be limited by the
289 pkt_count field. At the same time, we are rate limiting
290 based on the specified speed (in bytes per second) so token
291 bucket based rate limiting must also be applied. The
292 minimum bulk size is also constrained. If the calculated
293 bulk size is less then the minimum, then don't send
296 const uint32_t min_bulk = task->min_bulk_size;
297 uint32_t max_bulk = task->max_bulk_size;
299 if (task->pkt_count != (uint32_t)-1 && task->pkt_count < max_bulk) {
300 max_bulk = task->pkt_count;
303 uint32_t send_bulk = 0;
304 uint32_t pkt_idx_tmp = task->pkt_idx;
305 uint32_t would_send_bytes = 0;
309 * TODO - this must be improved to take into account the fact that, after applying randoms
310 * The packet can be replaced by an ARP
312 for (uint16_t j = 0; j < max_bulk; ++j) {
313 struct pkt_template *pktpl = &task->pkt_template[pkt_idx_tmp];
314 pkt_size = pktpl->len;
315 uint32_t pkt_len = pkt_len_to_wire_size(pkt_size);
316 if (pkt_len + would_send_bytes > task->token_time.bytes_now)
319 pkt_idx_tmp = task_gen_next_pkt_idx(task, pkt_idx_tmp);
322 would_send_bytes += pkt_len;
325 if (send_bulk < min_bulk)
327 *total_bytes = would_send_bytes;
331 static void task_gen_apply_random_fields(struct task_gen *task, uint8_t *hdr)
333 uint32_t ret, ret_tmp;
335 for (uint16_t i = 0; i < task->n_rands; ++i) {
336 ret = random_next(&task->rand[i].state);
337 ret_tmp = (ret & task->rand[i].rand_mask) | task->rand[i].fixed_bits;
339 ret_tmp = rte_bswap32(ret_tmp);
340 /* At this point, the lower order bytes (BE) contain
341 the generated value. The address where the values
342 of interest starts is at ret_tmp + 4 - rand_len. */
343 uint8_t *pret_tmp = (uint8_t*)&ret_tmp;
344 rte_memcpy(hdr + task->rand[i].rand_offset, pret_tmp + 4 - task->rand[i].rand_len, task->rand[i].rand_len);
348 static void task_gen_apply_all_random_fields(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
353 for (uint16_t i = 0; i < count; ++i)
354 task_gen_apply_random_fields(task, pkt_hdr[i]);
357 static void task_gen_apply_accur_pos(struct task_gen *task, uint8_t *pkt_hdr, uint32_t accuracy)
359 *(uint32_t *)(pkt_hdr + task->accur_pos) = accuracy;
362 static void task_gen_apply_sig(struct task_gen *task, uint8_t *pkt_hdr)
364 *(uint32_t *)(pkt_hdr + task->sig_pos) = task->sig;
367 static void task_gen_apply_all_accur_pos(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
369 if (!task->accur_pos)
372 /* The accuracy of task->pkt_queue_index - 64 is stored in
373 packet task->pkt_queue_index. The ID modulo 64 is the
375 for (uint16_t j = 0; j < count; ++j) {
376 if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
377 uint32_t accuracy = task->accur[(task->pkt_queue_index + j) & 63];
378 task_gen_apply_accur_pos(task, pkt_hdr[j], accuracy);
383 static void task_gen_apply_all_sig(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
388 for (uint16_t j = 0; j < count; ++j) {
389 if ((mbufs[j]->udata64 & MBUF_ARP) == 0) {
390 task_gen_apply_sig(task, pkt_hdr[j]);
395 static void task_gen_apply_unique_id(struct task_gen *task, uint8_t *pkt_hdr, const struct unique_id *id)
397 struct unique_id *dst = (struct unique_id *)(pkt_hdr + task->packet_id_pos);
402 static void task_gen_apply_all_unique_id(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
404 if (!task->packet_id_pos)
407 for (uint16_t i = 0; i < count; ++i) {
408 if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
410 unique_id_init(&id, task->generator_id, task->pkt_queue_index++);
411 task_gen_apply_unique_id(task, pkt_hdr[i], &id);
416 static void task_gen_checksum_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
418 if (!(task->runtime_flags & TASK_TX_CRC))
421 if (!task->runtime_checksum_needed)
424 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - count);
425 for (uint16_t i = 0; i < count; ++i) {
426 if ((mbufs[i]->udata64 & MBUF_ARP) == 0) {
427 struct pkt_template *pkt_template = &task->pkt_template[pkt_idx];
428 checksum_packet(pkt_hdr[i], mbufs[i], pkt_template, task->cksum_offload);
429 pkt_idx = task_gen_next_pkt_idx(task, pkt_idx);
434 static void task_gen_consume_tokens(struct task_gen *task, uint32_t tokens, uint32_t send_count)
436 /* If max burst has been sent, we can't keep up so just assume
437 that we can (leaving a "gap" in the packet stream on the
439 task->token_time.bytes_now -= tokens;
440 if (send_count == task->max_bulk_size && task->token_time.bytes_now > tokens) {
441 task->token_time.bytes_now = tokens;
445 static uint64_t task_gen_calc_bulk_duration(struct task_gen *task, uint32_t count)
447 uint32_t pkt_idx = task_gen_offset_pkt_idx(task, - 1);
448 struct pkt_template *last_pkt_template = &task->pkt_template[pkt_idx];
449 uint32_t last_pkt_len = pkt_len_to_wire_size(last_pkt_template->len);
450 uint64_t last_pkt_duration = bytes_to_tsc(task, last_pkt_len);
451 uint64_t bulk_duration = task->pkt_tsc_offset[count - 1] + last_pkt_duration;
453 return bulk_duration;
456 static uint64_t task_gen_write_latency(struct task_gen *task, uint8_t **pkt_hdr, uint32_t count)
458 if (!task->lat_enabled)
461 uint64_t tx_tsc, delta_t;
462 uint64_t tsc_before_tx = 0;
464 /* Just before sending the packets, apply the time stamp
465 relative to when the first packet will be sent. The first
466 packet will be sent now. The time is read for each packet
467 to reduce the error towards the actual time the packet will
469 uint64_t write_tsc_after, write_tsc_before;
471 write_tsc_before = rte_rdtsc();
473 /* The time it took previously to write the time stamps in the
474 packets is used as an estimate for how long it will take to
475 write the time stamps now. The estimated time at which the
476 packets will actually be sent will be at tx_tsc. */
477 tx_tsc = write_tsc_before + task->write_duration_estimate;
479 /* The offset delta_t tracks the difference between the actual
480 time and the time written in the packets. Adding the offset
481 to the actual time insures that the time written in the
482 packets is monotonically increasing. At the same time,
483 simply sleeping until delta_t is zero would leave a period
484 of silence on the line. The error has been introduced
485 earlier, but the packets have already been sent. */
486 if (tx_tsc < task->earliest_tsc_next_pkt)
487 delta_t = task->earliest_tsc_next_pkt - tx_tsc;
491 for (uint16_t i = 0; i < count; ++i) {
492 uint32_t *pos = (uint32_t *)(pkt_hdr[i] + task->lat_pos);
493 const uint64_t pkt_tsc = tx_tsc + delta_t + task->pkt_tsc_offset[i];
495 *pos = pkt_tsc >> LATENCY_ACCURACY;
498 uint64_t bulk_duration = task_gen_calc_bulk_duration(task, count);
500 task->earliest_tsc_next_pkt = tx_tsc + delta_t + bulk_duration;
501 write_tsc_after = rte_rdtsc();
502 task->write_duration_estimate = write_tsc_after - write_tsc_before;
504 /* Make sure that the time stamps that were written
505 are valid. The offset must be taken into account */
507 tsc_before_tx = rte_rdtsc();
508 } while (tsc_before_tx < tx_tsc);
509 return tsc_before_tx;
512 static void task_gen_store_accuracy(struct task_gen *task, uint32_t count, uint64_t tsc_before_tx)
514 if (!task->accur_pos)
517 uint64_t accur = rte_rdtsc() - tsc_before_tx;
518 uint64_t first_accuracy_idx = task->pkt_queue_index - count;
520 for (uint32_t i = 0; i < count; ++i) {
521 uint32_t accuracy_idx = (first_accuracy_idx + i) & 63;
523 task->accur[accuracy_idx] = accur;
527 static void task_gen_load_and_prefetch(struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
529 for (uint16_t i = 0; i < count; ++i)
530 rte_prefetch0(mbufs[i]);
531 for (uint16_t i = 0; i < count; ++i)
532 pkt_hdr[i] = rte_pktmbuf_mtod(mbufs[i], uint8_t *);
533 for (uint16_t i = 0; i < count; ++i)
534 rte_prefetch0(pkt_hdr[i]);
537 static void task_gen_build_packets(struct task_gen *task, struct rte_mbuf **mbufs, uint8_t **pkt_hdr, uint32_t count)
539 uint64_t will_send_bytes = 0;
541 for (uint16_t i = 0; i < count; ++i) {
542 struct pkt_template *pktpl = &task->pkt_template[task->pkt_idx];
543 struct pkt_template *pkt_template = &task->pkt_template[task->pkt_idx];
544 pkt_template_init_mbuf(pkt_template, mbufs[i], pkt_hdr[i]);
545 mbufs[i]->udata64 = task->pkt_idx & TEMPLATE_INDEX_MASK;
546 struct ether_hdr *hdr = (struct ether_hdr *)pkt_hdr[i];
547 if (task->lat_enabled) {
548 task->pkt_tsc_offset[i] = bytes_to_tsc(task, will_send_bytes);
549 will_send_bytes += pkt_len_to_wire_size(pkt_template->len);
551 task->pkt_idx = task_gen_next_pkt_idx(task, task->pkt_idx);
555 static void task_gen_update_config(struct task_gen *task)
557 if (task->token_time.cfg.bpp != task->new_rate_bps)
558 task_gen_reset_token_time(task);
561 static inline void build_value(struct task_gen *task, uint32_t mask, int bit_pos, uint32_t val, uint32_t fixed_bits)
563 struct task_base *tbase = (struct task_base *)task;
565 build_value(task, mask >> 1, bit_pos + 1, val, fixed_bits);
567 build_value(task, mask >> 1, bit_pos + 1, val | (1 << bit_pos), fixed_bits);
570 register_ip_to_ctrl_plane(tbase->l3.tmaster, rte_cpu_to_be_32(val | fixed_bits), tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
573 static inline void register_all_ip_to_ctrl_plane(struct task_gen *task)
575 struct task_base *tbase = (struct task_base *)task;
580 for (uint32_t i = 0; i < task->n_pkts; ++i) {
581 struct pkt_template *pktpl = &task->pkt_template[i];
582 unsigned int ip_src_pos = 0;
584 unsigned int l2_len = sizeof(struct ether_hdr);
586 uint8_t *pkt = pktpl->buf;
587 struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
588 uint16_t ether_type = eth_hdr->ether_type;
589 struct vlan_hdr *vlan_hdr;
592 while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(struct vlan_hdr) < pktpl->len)) {
593 vlan_hdr = (struct vlan_hdr *)(pkt + l2_len);
595 ether_type = vlan_hdr->eth_proto;
597 if ((ether_type == ETYPE_MPLSU) || (ether_type == ETYPE_MPLSM)) {
601 if ((ether_type != ETYPE_IPv4) && !maybe_ipv4)
604 struct ipv4_hdr *ip = (struct ipv4_hdr *)(pkt + l2_len);
605 PROX_PANIC(ip->version_ihl >> 4 != 4, "IPv4 ether_type but IP version = %d != 4", ip->version_ihl >> 4);
607 // Even if IPv4 header contains options, options are after ip src and dst
608 ip_src_pos = l2_len + sizeof(struct ipv4_hdr) - 2 * sizeof(uint32_t);
609 uint32_t *ip_src = ((uint32_t *)(pktpl->buf + ip_src_pos));
610 plog_info("\tip_src_pos = %d, ip_src = %x\n", ip_src_pos, *ip_src);
611 register_ip_to_ctrl_plane(tbase->l3.tmaster, *ip_src, tbase->l3.reachable_port_id, tbase->l3.core_id, tbase->l3.task_id);
613 for (int j = 0; j < task->n_rands; j++) {
614 offset = task->rand[j].rand_offset;
615 len = task->rand[j].rand_len;
616 mask = task->rand[j].rand_mask;
617 fixed = task->rand[j].fixed_bits;
618 plog_info("offset = %d, len = %d, mask = %x, fixed = %x\n", offset, len, mask, fixed);
619 if ((offset < ip_src_pos + 4) && (offset + len >= ip_src_pos)) {
620 if (offset >= ip_src_pos) {
621 int32_t ip_src_mask = (1 << (4 + ip_src_pos - offset) * 8) - 1;
622 mask = mask & ip_src_mask;
623 fixed = (fixed & ip_src_mask) | (rte_be_to_cpu_32(*ip_src) & ~ip_src_mask);
624 build_value(task, mask, 0, 0, fixed);
626 int32_t bits = ((ip_src_pos + 4 - offset - len) * 8);
628 fixed = (fixed << bits) | (rte_be_to_cpu_32(*ip_src) & ((1 << bits) - 1));
629 build_value(task, mask, 0, 0, fixed);
636 static int handle_gen_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
638 struct task_gen *task = (struct task_gen *)tbase;
639 uint8_t out[MAX_PKT_BURST] = {0};
644 // If link is down, link_speed is 0
645 if (unlikely(task->link_speed == 0)) {
646 if (task->port && task->port->link_speed != 0) {
647 task->link_speed = task->port->link_speed * 125000L;
648 plog_info("\tPort %u: link speed is %ld Mbps\n",
649 (uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
654 task_gen_update_config(task);
656 if (task->pkt_count == 0) {
657 task_gen_reset_token_time(task);
660 if (!task->token_time.cfg.bpp)
663 token_time_update(&task->token_time, rte_rdtsc());
665 uint32_t would_send_bytes;
666 uint32_t send_bulk = task_gen_calc_send_bulk(task, &would_send_bytes);
670 task_gen_take_count(task, send_bulk);
671 task_gen_consume_tokens(task, would_send_bytes, send_bulk);
673 struct rte_mbuf **new_pkts = local_mbuf_refill_and_take(&task->local_mbuf, send_bulk);
674 if (new_pkts == NULL)
676 uint8_t *pkt_hdr[MAX_RING_BURST];
678 task_gen_load_and_prefetch(new_pkts, pkt_hdr, send_bulk);
679 task_gen_build_packets(task, new_pkts, pkt_hdr, send_bulk);
680 task_gen_apply_all_random_fields(task, pkt_hdr, send_bulk);
681 task_gen_apply_all_accur_pos(task, new_pkts, pkt_hdr, send_bulk);
682 task_gen_apply_all_sig(task, new_pkts, pkt_hdr, send_bulk);
683 task_gen_apply_all_unique_id(task, new_pkts, pkt_hdr, send_bulk);
685 uint64_t tsc_before_tx;
687 tsc_before_tx = task_gen_write_latency(task, pkt_hdr, send_bulk);
688 task_gen_checksum_packets(task, new_pkts, pkt_hdr, send_bulk);
689 ret = task->base.tx_pkt(&task->base, new_pkts, send_bulk, out);
690 task_gen_store_accuracy(task, send_bulk, tsc_before_tx);
694 static void init_task_gen_seeds(struct task_gen *task)
696 for (size_t i = 0; i < sizeof(task->rand)/sizeof(task->rand[0]); ++i)
697 random_init_seed(&task->rand[i].state);
700 static uint32_t pcap_count_pkts(pcap_t *handle)
702 struct pcap_pkthdr header;
705 long pkt1_fpos = ftell(pcap_file(handle));
707 while ((buf = pcap_next(handle, &header))) {
710 int ret2 = fseek(pcap_file(handle), pkt1_fpos, SEEK_SET);
711 PROX_PANIC(ret2 != 0, "Failed to reset reading pcap file\n");
715 static uint64_t avg_time_stamp(uint64_t *time_stamp, uint32_t n)
717 uint64_t tot_inter_pkt = 0;
719 for (uint32_t i = 0; i < n; ++i)
720 tot_inter_pkt += time_stamp[i];
721 return (tot_inter_pkt + n / 2)/n;
724 static int pcap_read_pkts(pcap_t *handle, const char *file_name, uint32_t n_pkts, struct pkt_template *proto, uint64_t *time_stamp)
726 struct pcap_pkthdr header;
730 for (uint32_t i = 0; i < n_pkts; ++i) {
731 buf = pcap_next(handle, &header);
733 PROX_PANIC(buf == NULL, "Failed to read packet %d from pcap %s\n", i, file_name);
734 proto[i].len = header.len;
735 len = RTE_MIN(header.len, sizeof(proto[i].buf));
736 if (header.len > len)
737 plogx_warn("Packet truncated from %u to %zu bytes\n", header.len, len);
740 static struct timeval beg;
746 tv = tv_diff(&beg, &header.ts);
747 tv_to_tsc(&tv, time_stamp + i);
749 rte_memcpy(proto[i].buf, buf, len);
752 if (time_stamp && n_pkts) {
753 for (uint32_t i = n_pkts - 1; i > 0; --i)
754 time_stamp[i] -= time_stamp[i - 1];
755 /* Since the handle function will loop the packets,
756 there is one time-stamp that is not provided by the
757 pcap file. This is the time between the last and
758 the first packet. This implementation takes the
759 average of the inter-packet times here. */
761 time_stamp[0] = avg_time_stamp(time_stamp + 1, n_pkts - 1);
767 static int check_pkt_size(struct task_gen *task, uint32_t pkt_size, int do_panic)
769 const uint16_t min_len = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr);
770 const uint16_t max_len = ETHER_MAX_LEN - 4;
773 PROX_PANIC(pkt_size == 0, "Invalid packet size length (no packet defined?)\n");
774 PROX_PANIC(pkt_size > max_len, "pkt_size out of range (must be <= %u)\n", max_len);
775 PROX_PANIC(pkt_size < min_len, "pkt_size out of range (must be >= %u)\n", min_len);
779 plog_err("Invalid packet size length (no packet defined?)\n");
782 if (pkt_size > max_len) {
783 plog_err("pkt_size out of range (must be <= %u)\n", max_len);
786 if (pkt_size < min_len) {
787 plog_err("pkt_size out of range (must be >= %u)\n", min_len);
794 static int check_all_pkt_size(struct task_gen *task, int do_panic)
797 for (uint32_t i = 0; i < task->n_pkts;++i) {
798 if ((rc = check_pkt_size(task, task->pkt_template[i].len, do_panic)) != 0)
804 static int check_fields_in_bounds(struct task_gen *task, uint32_t pkt_size, int do_panic)
806 if (task->lat_enabled) {
807 uint32_t pos_beg = task->lat_pos;
808 uint32_t pos_end = task->lat_pos + 3U;
811 PROX_PANIC(pkt_size <= pos_end, "Writing latency at %u-%u, but packet size is %u bytes\n",
812 pos_beg, pos_end, pkt_size);
813 else if (pkt_size <= pos_end) {
814 plog_err("Writing latency at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
818 if (task->packet_id_pos) {
819 uint32_t pos_beg = task->packet_id_pos;
820 uint32_t pos_end = task->packet_id_pos + 4U;
823 PROX_PANIC(pkt_size <= pos_end, "Writing packet at %u-%u, but packet size is %u bytes\n",
824 pos_beg, pos_end, pkt_size);
825 else if (pkt_size <= pos_end) {
826 plog_err("Writing packet at %u-%u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
830 if (task->accur_pos) {
831 uint32_t pos_beg = task->accur_pos;
832 uint32_t pos_end = task->accur_pos + 3U;
835 PROX_PANIC(pkt_size <= pos_end, "Writing accuracy at %u%-u, but packet size is %u bytes\n",
836 pos_beg, pos_end, pkt_size);
837 else if (pkt_size <= pos_end) {
838 plog_err("Writing accuracy at %u%-u, but packet size is %u bytes\n", pos_beg, pos_end, pkt_size);
845 static void task_gen_pkt_template_recalc_metadata(struct task_gen *task)
847 struct pkt_template *template;
849 for (size_t i = 0; i < task->n_pkts; ++i) {
850 template = &task->pkt_template[i];
851 parse_l2_l3_len(template->buf, &template->l2_len, &template->l3_len, template->len);
855 static void task_gen_pkt_template_recalc_checksum(struct task_gen *task)
857 struct pkt_template *template;
860 task->runtime_checksum_needed = 0;
861 for (size_t i = 0; i < task->n_pkts; ++i) {
862 template = &task->pkt_template[i];
863 if (template->l2_len == 0)
865 ip = (struct ipv4_hdr *)(template->buf + template->l2_len);
867 ip->hdr_checksum = 0;
868 prox_ip_cksum_sw(ip);
869 uint32_t l4_len = rte_bswap16(ip->total_length) - template->l3_len;
871 if (ip->next_proto_id == IPPROTO_UDP) {
872 struct udp_hdr *udp = (struct udp_hdr *)(((uint8_t *)ip) + template->l3_len);
873 prox_udp_cksum_sw(udp, l4_len, ip->src_addr, ip->dst_addr);
874 } else if (ip->next_proto_id == IPPROTO_TCP) {
875 struct tcp_hdr *tcp = (struct tcp_hdr *)(((uint8_t *)ip) + template->l3_len);
876 prox_tcp_cksum_sw(tcp, l4_len, ip->src_addr, ip->dst_addr);
879 /* The current implementation avoids checksum
880 calculation by determining that at packet
881 construction time, no fields are applied that would
882 require a recalculation of the checksum. */
883 if (task->lat_enabled && task->lat_pos > template->l2_len)
884 task->runtime_checksum_needed = 1;
885 if (task->accur_pos > template->l2_len)
886 task->runtime_checksum_needed = 1;
887 if (task->packet_id_pos > template->l2_len)
888 task->runtime_checksum_needed = 1;
892 static void task_gen_pkt_template_recalc_all(struct task_gen *task)
894 task_gen_pkt_template_recalc_metadata(task);
895 task_gen_pkt_template_recalc_checksum(task);
898 static void task_gen_reset_pkt_templates_len(struct task_gen *task)
900 struct pkt_template *src, *dst;
902 for (size_t i = 0; i < task->n_pkts; ++i) {
903 src = &task->pkt_template_orig[i];
904 dst = &task->pkt_template[i];
909 static void task_gen_reset_pkt_templates_content(struct task_gen *task)
911 struct pkt_template *src, *dst;
913 for (size_t i = 0; i < task->n_pkts; ++i) {
914 src = &task->pkt_template_orig[i];
915 dst = &task->pkt_template[i];
916 memcpy(dst->buf, src->buf, dst->len);
920 static void task_gen_reset_pkt_templates(struct task_gen *task)
922 task_gen_reset_pkt_templates_len(task);
923 task_gen_reset_pkt_templates_content(task);
924 task_gen_pkt_template_recalc_all(task);
927 static void task_init_gen_load_pkt_inline(struct task_gen *task, struct task_args *targ)
929 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
931 if (targ->pkt_size > sizeof(task->pkt_template[0].buf))
932 targ->pkt_size = sizeof(task->pkt_template[0].buf);
935 size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
936 task->pkt_template = prox_zmalloc(mem_size, socket_id);
937 task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
939 PROX_PANIC(task->pkt_template == NULL ||
940 task->pkt_template_orig == NULL,
941 "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
943 rte_memcpy(task->pkt_template_orig[0].buf, targ->pkt_inline, targ->pkt_size);
944 task->pkt_template_orig[0].len = targ->pkt_size;
945 task_gen_reset_pkt_templates(task);
946 check_all_pkt_size(task, 1);
947 check_fields_in_bounds(task, task->pkt_template[0].len, 1);
950 static void task_init_gen_load_pcap(struct task_gen *task, struct task_args *targ)
952 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
953 char err[PCAP_ERRBUF_SIZE];
954 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
955 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
957 task->n_pkts = pcap_count_pkts(handle);
958 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
961 task->n_pkts = RTE_MIN(task->n_pkts, targ->n_pkts);
962 PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
963 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
964 size_t mem_size = task->n_pkts * sizeof(*task->pkt_template);
965 task->pkt_template = prox_zmalloc(mem_size, socket_id);
966 task->pkt_template_orig = prox_zmalloc(mem_size, socket_id);
967 PROX_PANIC(task->pkt_template == NULL ||
968 task->pkt_template_orig == NULL,
969 "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
971 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->pkt_template_orig, NULL);
973 task_gen_reset_pkt_templates(task);
976 static struct rte_mempool *task_gen_create_mempool(struct task_args *targ)
978 static char name[] = "gen_pool";
979 struct rte_mempool *ret;
980 const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
983 ret = rte_mempool_create(name, targ->nb_mbuf - 1, MBUF_SIZE,
984 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
985 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
987 PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
988 sock_id, targ->nb_mbuf - 1);
992 void task_gen_set_pkt_count(struct task_base *tbase, uint32_t count)
994 struct task_gen *task = (struct task_gen *)tbase;
996 task->pkt_count = count;
999 int task_gen_set_pkt_size(struct task_base *tbase, uint32_t pkt_size)
1001 struct task_gen *task = (struct task_gen *)tbase;
1004 if ((rc = check_pkt_size(task, pkt_size, 0)) != 0)
1006 if ((rc = check_fields_in_bounds(task, pkt_size, 0)) != 0)
1008 task->pkt_template[0].len = pkt_size;
1012 void task_gen_set_rate(struct task_base *tbase, uint64_t bps)
1014 struct task_gen *task = (struct task_gen *)tbase;
1016 task->new_rate_bps = bps;
1019 void task_gen_reset_randoms(struct task_base *tbase)
1021 struct task_gen *task = (struct task_gen *)tbase;
1023 for (uint32_t i = 0; i < task->n_rands; ++i) {
1024 task->rand[i].rand_mask = 0;
1025 task->rand[i].fixed_bits = 0;
1026 task->rand[i].rand_offset = 0;
1031 int task_gen_set_value(struct task_base *tbase, uint32_t value, uint32_t offset, uint32_t len)
1033 struct task_gen *task = (struct task_gen *)tbase;
1035 for (size_t i = 0; i < task->n_pkts; ++i) {
1036 uint32_t to_write = rte_cpu_to_be_32(value) >> ((4 - len) * 8);
1037 uint8_t *dst = task->pkt_template[i].buf;
1039 rte_memcpy(dst + offset, &to_write, len);
1042 task_gen_pkt_template_recalc_all(task);
1047 void task_gen_reset_values(struct task_base *tbase)
1049 struct task_gen *task = (struct task_gen *)tbase;
1051 task_gen_reset_pkt_templates_content(task);
1054 uint32_t task_gen_get_n_randoms(struct task_base *tbase)
1056 struct task_gen *task = (struct task_gen *)tbase;
1058 return task->n_rands;
1061 static void init_task_gen_pcap(struct task_base *tbase, struct task_args *targ)
1063 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1064 const uint32_t sockid = rte_lcore_to_socket_id(targ->lconf->id);
1066 task->loop = targ->loop;
1068 task->hz = rte_get_tsc_hz();
1070 task->local_mbuf.mempool = task_gen_create_mempool(targ);
1072 PROX_PANIC(!strcmp(targ->pcap_file, ""), "No pcap file defined\n");
1074 char err[PCAP_ERRBUF_SIZE];
1075 pcap_t *handle = pcap_open_offline(targ->pcap_file, err);
1076 PROX_PANIC(handle == NULL, "Failed to open PCAP file: %s\n", err);
1078 task->n_pkts = pcap_count_pkts(handle);
1079 plogx_info("%u packets in pcap file '%s'\n", task->n_pkts, targ->pcap_file);
1082 plogx_info("Configured to load %u packets\n", targ->n_pkts);
1083 if (task->n_pkts > targ->n_pkts)
1084 task->n_pkts = targ->n_pkts;
1086 PROX_PANIC(task->n_pkts > MAX_TEMPLATE_INDEX, "Too many packets specified in pcap - increase MAX_TEMPLATE_INDEX\n");
1088 plogx_info("Loading %u packets from pcap\n", task->n_pkts);
1090 size_t mem_size = task->n_pkts * (sizeof(*task->proto) + sizeof(*task->proto_tsc));
1091 uint8_t *mem = prox_zmalloc(mem_size, sockid);
1093 PROX_PANIC(mem == NULL, "Failed to allocate %lu bytes (in huge pages) for pcap file\n", mem_size);
1094 task->proto = (struct pkt_template *) mem;
1095 task->proto_tsc = (uint64_t *)(mem + task->n_pkts * sizeof(*task->proto));
1097 pcap_read_pkts(handle, targ->pcap_file, task->n_pkts, task->proto, task->proto_tsc);
1101 static int task_gen_find_random_with_offset(struct task_gen *task, uint32_t offset)
1103 for (uint32_t i = 0; i < task->n_rands; ++i) {
1104 if (task->rand[i].rand_offset == offset) {
1112 int task_gen_add_rand(struct task_base *tbase, const char *rand_str, uint32_t offset, uint32_t rand_id)
1114 struct task_gen *task = (struct task_gen *)tbase;
1115 uint32_t existing_rand;
1117 if (rand_id == UINT32_MAX && task->n_rands == 64) {
1118 plog_err("Too many randoms\n");
1121 uint32_t mask, fixed, len;
1123 if (parse_random_str(&mask, &fixed, &len, rand_str)) {
1124 plog_err("%s\n", get_parse_err());
1127 task->runtime_checksum_needed = 1;
1129 existing_rand = task_gen_find_random_with_offset(task, offset);
1130 if (existing_rand != UINT32_MAX) {
1131 plog_warn("Random at offset %d already set => overwriting len = %d %s\n", offset, len, rand_str);
1132 rand_id = existing_rand;
1133 task->rand[rand_id].rand_len = len;
1134 task->rand[rand_id].rand_offset = offset;
1135 task->rand[rand_id].rand_mask = mask;
1136 task->rand[rand_id].fixed_bits = fixed;
1140 task->rand[task->n_rands].rand_len = len;
1141 task->rand[task->n_rands].rand_offset = offset;
1142 task->rand[task->n_rands].rand_mask = mask;
1143 task->rand[task->n_rands].fixed_bits = fixed;
1149 static void start(struct task_base *tbase)
1151 struct task_gen *task = (struct task_gen *)tbase;
1152 task->pkt_queue_index = 0;
1154 task_gen_reset_token_time(task);
1155 if (tbase->l3.tmaster) {
1156 register_all_ip_to_ctrl_plane(task);
1159 // task->port->link_speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC.
1160 // task->link_speed reports link speed in Bytes per sec.
1161 // It can be 0 if link is down, and must hence be updated in fast path.
1162 task->link_speed = task->port->link_speed * 125000L;
1163 if (task->link_speed)
1164 plog_info("\tPort %u: link speed is %ld Mbps\n",
1165 (uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
1167 plog_info("\tPort %u: link speed is %ld Mbps - link might be down\n",
1168 (uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
1171 Handle the case when two tasks transmit to the same port
1172 and one of them is stopped. In that case ARP (requests or replies)
1173 might not be sent. Master will have to keep a list of rings.
1174 stop will have to de-register IP from ctrl plane.
1175 un-registration will remove the ring. when having more than
1176 one active rings, master can always use the first one
1180 static void start_pcap(struct task_base *tbase)
1182 struct task_gen_pcap *task = (struct task_gen_pcap *)tbase;
1183 /* When we start, the first packet is sent immediately. */
1184 task->last_tsc = rte_rdtsc() - task->proto_tsc[0];
1188 static void init_task_gen_early(struct task_args *targ)
1190 uint8_t *generator_count = prox_sh_find_system("generator_count");
1192 if (generator_count == NULL) {
1193 generator_count = prox_zmalloc(sizeof(*generator_count), rte_lcore_to_socket_id(targ->lconf->id));
1194 PROX_PANIC(generator_count == NULL, "Failed to allocate generator count\n");
1195 prox_sh_add_system("generator_count", generator_count);
1197 targ->generator_id = *generator_count;
1198 (*generator_count)++;
1201 static void init_task_gen(struct task_base *tbase, struct task_args *targ)
1203 struct task_gen *task = (struct task_gen *)tbase;
1205 task->packet_id_pos = targ->packet_id_pos;
1207 task->local_mbuf.mempool = task_gen_create_mempool(targ);
1208 PROX_PANIC(task->local_mbuf.mempool == NULL, "Failed to create mempool\n");
1210 task->hz = rte_get_tsc_hz();
1211 task->lat_pos = targ->lat_pos;
1212 task->accur_pos = targ->accur_pos;
1213 task->sig_pos = targ->sig_pos;
1214 task->sig = targ->sig;
1215 task->new_rate_bps = targ->rate_bps;
1218 * For tokens, use 10 Gbps as base rate
1219 * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
1220 * Script can query prox "port info" command to find out the port link speed to know
1221 * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
1222 * probably also to check the driver (as returned by the same "port info" command.
1224 struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
1225 token_time_init(&task->token_time, &tt_cfg);
1227 init_task_gen_seeds(task);
1229 task->min_bulk_size = targ->min_bulk_size;
1230 task->max_bulk_size = targ->max_bulk_size;
1231 if (task->min_bulk_size < 1)
1232 task->min_bulk_size = 1;
1233 if (task->max_bulk_size < 1)
1234 task->max_bulk_size = 64;
1235 PROX_PANIC(task->max_bulk_size > 64, "max_bulk_size higher than 64\n");
1236 PROX_PANIC(task->max_bulk_size < task->min_bulk_size, "max_bulk_size must be > than min_bulk_size\n");
1238 task->pkt_count = -1;
1239 task->lat_enabled = targ->lat_enabled;
1240 task->runtime_flags = targ->runtime_flags;
1241 PROX_PANIC((task->lat_pos || task->accur_pos) && !task->lat_enabled, "lat not enabled by lat pos or accur pos configured\n");
1243 task->generator_id = targ->generator_id;
1244 plog_info("\tGenerator id = %d\n", task->generator_id);
1245 task->link_speed = UINT64_MAX;
1247 if (!strcmp(targ->pcap_file, "")) {
1248 plog_info("\tUsing inline definition of a packet\n");
1249 task_init_gen_load_pkt_inline(task, targ);
1251 plog_info("Loading from pcap %s\n", targ->pcap_file);
1252 task_init_gen_load_pcap(task, targ);
1255 if ((targ->flags & DSF_KEEP_SRC_MAC) == 0 && (targ->nb_txrings || targ->nb_txports)) {
1256 uint8_t *src_addr = prox_port_cfg[tbase->tx_params_hw.tx_port_queue->port].eth_addr.addr_bytes;
1257 for (uint32_t i = 0; i < task->n_pkts; ++i) {
1258 rte_memcpy(&task->pkt_template[i].buf[6], src_addr, 6);
1261 memcpy(&task->src_mac, &prox_port_cfg[task->base.tx_params_hw.tx_port_queue->port].eth_addr, sizeof(struct ether_addr));
1262 for (uint32_t i = 0; i < targ->n_rand_str; ++i) {
1263 PROX_PANIC(task_gen_add_rand(tbase, targ->rand_str[i], targ->rand_offset[i], UINT32_MAX),
1264 "Failed to add random\n");
1267 struct prox_port_cfg *port = find_reachable_port(targ);
1269 task->cksum_offload = port->capabilities.tx_offload_cksum;
1274 static struct task_init task_init_gen = {
1276 .init = init_task_gen,
1277 .handle = handle_gen_bulk,
1279 .early_init = init_task_gen_early,
1281 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1282 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1283 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
1285 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1287 .size = sizeof(struct task_gen)
1290 static struct task_init task_init_gen_l3 = {
1292 .sub_mode_str = "l3",
1293 .init = init_task_gen,
1294 .handle = handle_gen_bulk,
1296 .early_init = init_task_gen_early,
1298 // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
1299 // vector mode is used by DPDK, resulting (theoretically) in higher performance.
1300 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
1302 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1304 .size = sizeof(struct task_gen)
1307 static struct task_init task_init_gen_pcap = {
1309 .sub_mode_str = "pcap",
1310 .init = init_task_gen_pcap,
1311 .handle = handle_gen_pcap_bulk,
1312 .start = start_pcap,
1313 .early_init = init_task_gen_early,
1315 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
1317 .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
1319 .size = sizeof(struct task_gen_pcap)
1322 __attribute__((constructor)) static void reg_task_gen(void)
1324 reg_task(&task_init_gen);
1325 reg_task(&task_init_gen_l3);
1326 reg_task(&task_init_gen_pcap);