When doing latency measurements PROX takes into account the
generation or reception of a bulk of packets. For instance, if
PROX receives at time T 4 packets, it knows that the first
packet was received by te NIC before T (the time to receive the other
3 packets, as they were received at maximum link speed).
So the latency data is decreased by the minimum time to receive those
3 packets.
For this PROX was using a default link speed of 10Gbps. This is wrong
for 1Gbps and 40Gbps networks, and was causing for instance issues
on 40 Gbps networks as extrapolating too much, resulting in either
too low latencies or negative numbers (visible as very high latencies).
Change-Id: I4e0f02e8383dd8d168ac50ecae37a05510ad08bc
Signed-off-by: Xavier Simonart <xavier.simonart@intel.com>
if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) {
plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id);
}
if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) {
plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id);
}
- else if (speed > 400.0f || speed < 0.0f) {
- plog_err("Speed out of range (must be betweeen 0%% and 100%%)\n");
+ else if (speed > 1000.0f || speed < 0.0f) { // Up to 100 Gbps
+ plog_err("Speed out of range (must be betweeen 0%% and 1000%%)\n");
}
else {
struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id];
}
else {
struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id];
if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) {
plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id);
}
if ((!task_is_mode_and_submode(lcore_id, task_id, "gen", "")) && (!task_is_mode_and_submode(lcore_id, task_id, "gen", "l3"))) {
plog_err("Core %u task %u is not generating packets\n", lcore_id, task_id);
}
- else if (bps > 1250000000) {
- plog_err("Speed out of range (must be <= 1250000000)\n");
+ else if (bps > 12500000000) { // Up to 100Gbps
+ plog_err("Speed out of range (must be <= 12500000000)\n");
}
else {
struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id];
}
else {
struct task_base *tbase = lcore_cfg[lcore_id].tasks_all[task_id];
struct ether_addr src_mac;
uint8_t flags;
uint8_t cksum_offload;
struct ether_addr src_mac;
uint8_t flags;
uint8_t cksum_offload;
+ struct prox_port_cfg *port;
} __rte_cache_aligned;
static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
} __rte_cache_aligned;
static inline uint8_t ipv4_get_hdr_len(struct ipv4_hdr *ip)
if (tbase->l3.tmaster) {
register_all_ip_to_ctrl_plane(task);
}
if (tbase->l3.tmaster) {
register_all_ip_to_ctrl_plane(task);
}
+ if (task->port) {
+ // task->port->link->speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC
+ // task->link_speed reported link speed in Bytes per sec.
+ task->link_speed = task->port->link_speed * 125000L;
+ plog_info("\tGenerating at %ld Mbps\n", 8 * task->link_speed / 1000000);
+ }
/* TODO
Handle the case when two tasks transmit to the same port
and one of them is stopped. In that case ARP (requests or replies)
/* TODO
Handle the case when two tasks transmit to the same port
and one of them is stopped. In that case ARP (requests or replies)
task->sig = targ->sig;
task->new_rate_bps = targ->rate_bps;
task->sig = targ->sig;
task->new_rate_bps = targ->rate_bps;
+ /*
+ * For tokens, use 10 Gbps as base rate
+ * Scripts can then use speed command, with speed=100 as 10 Gbps and speed=400 as 40 Gbps
+ * Script can query prox "port info" command to find out the port link speed to know
+ * at which rate to start. Note that virtio running on OVS returns 10 Gbps, so a script has
+ * probably also to check the driver (as returned by the same "port info" command.
+ */
struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
struct token_time_cfg tt_cfg = token_time_cfg_create(1250000000, rte_get_tsc_hz(), -1);
token_time_init(&task->token_time, &tt_cfg);
token_time_init(&task->token_time, &tt_cfg);
init_task_gen_seeds(task);
task->min_bulk_size = targ->min_bulk_size;
init_task_gen_seeds(task);
task->min_bulk_size = targ->min_bulk_size;
task->generator_id = targ->generator_id;
task->link_speed = UINT64_MAX;
task->generator_id = targ->generator_id;
task->link_speed = UINT64_MAX;
- if (targ->nb_txrings == 0 && targ->nb_txports == 1)
- task->link_speed = 1250000000;
if (!strcmp(targ->pcap_file, "")) {
plog_info("\tUsing inline definition of a packet\n");
if (!strcmp(targ->pcap_file, "")) {
plog_info("\tUsing inline definition of a packet\n");
struct prox_port_cfg *port = find_reachable_port(targ);
if (port) {
task->cksum_offload = port->capabilities.tx_offload_cksum;
struct prox_port_cfg *port = find_reachable_port(targ);
if (port) {
task->cksum_offload = port->capabilities.tx_offload_cksum;
#include "quit.h"
#include "eld.h"
#include "prox_shared.h"
#include "quit.h"
#include "eld.h"
#include "prox_shared.h"
+#include "prox_port_cfg.h"
#define DEFAULT_BUCKET_SIZE 10
#define DEFAULT_BUCKET_SIZE 10
uint32_t generator_count;
struct early_loss_detect *eld;
struct rx_pkt_meta_data *rx_pkt_meta;
uint32_t generator_count;
struct early_loss_detect *eld;
struct rx_pkt_meta_data *rx_pkt_meta;
FILE *fp_rx;
FILE *fp_tx;
FILE *fp_rx;
FILE *fp_tx;
+ struct prox_port_cfg *port;
};
static uint32_t abs_diff(uint32_t a, uint32_t b)
};
static uint32_t abs_diff(uint32_t a, uint32_t b)
return early_loss_detect_add(eld, packet_index);
}
return early_loss_detect_add(eld, packet_index);
}
-static uint64_t tsc_extrapolate_backward(uint64_t tsc_from, uint64_t bytes, uint64_t tsc_minimum)
+static uint64_t tsc_extrapolate_backward(uint64_t link_speed, uint64_t tsc_from, uint64_t bytes, uint64_t tsc_minimum)
- uint64_t tsc = tsc_from - rte_get_tsc_hz()*bytes/1250000000;
+ uint64_t tsc = tsc_from - (rte_get_tsc_hz()*bytes)/link_speed;
if (likely(tsc > tsc_minimum))
return tsc;
else
if (likely(tsc > tsc_minimum))
return tsc;
else
bytes_total_in_bulk += mbuf_wire_size(mbufs[flipped]);
}
bytes_total_in_bulk += mbuf_wire_size(mbufs[flipped]);
}
- pkt_rx_time = tsc_extrapolate_backward(rx_tsc, task->rx_pkt_meta[0].bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
+ pkt_rx_time = tsc_extrapolate_backward(task->link_speed, rx_tsc, task->rx_pkt_meta[0].bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
if ((uint32_t)((task->begin >> LATENCY_ACCURACY)) > pkt_rx_time) {
// Extrapolation went up to BEFORE begin => packets were stuck in the NIC but we were not seeing them
rx_time_err = pkt_rx_time - (uint32_t)(task->last_pkts_tsc >> LATENCY_ACCURACY);
if ((uint32_t)((task->begin >> LATENCY_ACCURACY)) > pkt_rx_time) {
// Extrapolation went up to BEFORE begin => packets were stuck in the NIC but we were not seeing them
rx_time_err = pkt_rx_time - (uint32_t)(task->last_pkts_tsc >> LATENCY_ACCURACY);
struct rx_pkt_meta_data *rx_pkt_meta = &task->rx_pkt_meta[j];
uint8_t *hdr = rx_pkt_meta->hdr;
struct rx_pkt_meta_data *rx_pkt_meta = &task->rx_pkt_meta[j];
uint8_t *hdr = rx_pkt_meta->hdr;
- pkt_rx_time = tsc_extrapolate_backward(rx_tsc, rx_pkt_meta->bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
+ pkt_rx_time = tsc_extrapolate_backward(task->link_speed, rx_tsc, rx_pkt_meta->bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
pkt_tx_time = rx_pkt_meta->pkt_tx_time;
if (task->unique_id_pos) {
pkt_tx_time = rx_pkt_meta->pkt_tx_time;
if (task->unique_id_pos) {
task->limit = nsec_to_tsc(accuracy_limit_nsec);
}
task->limit = nsec_to_tsc(accuracy_limit_nsec);
}
+static void lat_start(struct task_base *tbase)
+{
+ struct task_lat *task = (struct task_lat *)tbase;
+
+ if (task->port) {
+ // task->port->link->speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC
+ // task->link_speed reported link speed in Bytes per sec.
+ task->link_speed = task->port->link_speed * 125000L;
+ plog_info("\tReceiving at %ld Mbps\n", 8 * task->link_speed / 1000000);
+ }
+}
+
static void init_task_lat(struct task_base *tbase, struct task_args *targ)
{
struct task_lat *task = (struct task_lat *)tbase;
static void init_task_lat(struct task_base *tbase, struct task_args *targ)
{
struct task_lat *task = (struct task_lat *)tbase;
task_lat_set_accuracy_limit(task, targ->accuracy_limit_nsec);
task->rx_pkt_meta = prox_zmalloc(MAX_RX_PKT_ALL * sizeof(*task->rx_pkt_meta), socket_id);
PROX_PANIC(task->rx_pkt_meta == NULL, "unable to allocate memory to store RX packet meta data");
task_lat_set_accuracy_limit(task, targ->accuracy_limit_nsec);
task->rx_pkt_meta = prox_zmalloc(MAX_RX_PKT_ALL * sizeof(*task->rx_pkt_meta), socket_id);
PROX_PANIC(task->rx_pkt_meta == NULL, "unable to allocate memory to store RX packet meta data");
+
+ task->link_speed = UINT64_MAX;
+ if (targ->nb_rxports) {
+ // task->port structure is only used while starting handle_lat to get the link_speed.
+ // link_speed can not be quiried at init as the port has not been initialized yet.
+ struct prox_port_cfg *port = &prox_port_cfg[targ->rx_port_queue[0].port];
+ task->port = port;
+ }
}
static struct task_init task_init_lat = {
.mode_str = "lat",
.init = init_task_lat,
.handle = handle_lat_bulk,
}
static struct task_init task_init_lat = {
.mode_str = "lat",
.init = init_task_lat,
.handle = handle_lat_bulk,
.stop = lat_stop,
.flag_features = TASK_FEATURE_TSC_RX | TASK_FEATURE_RX_ALL | TASK_FEATURE_ZERO_RX | TASK_FEATURE_NEVER_DISCARDS,
.size = sizeof(struct task_lat)
.stop = lat_stop,
.flag_features = TASK_FEATURE_TSC_RX | TASK_FEATURE_RX_ALL | TASK_FEATURE_ZERO_RX | TASK_FEATURE_NEVER_DISCARDS,
.size = sizeof(struct task_lat)