/*
-// Copyright (c) 2010-2017 Intel Corporation
+// Copyright (c) 2010-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
#include "prox_shared.h"
#include "prox_port_cfg.h"
-#define DEFAULT_BUCKET_SIZE 10
-#define ACCURACY_BUFFER_SIZE 64
+#define DEFAULT_BUCKET_SIZE 11
+#define ACCURACY_BUFFER_SIZE (2 * ACCURACY_WINDOW)
struct lat_info {
uint32_t rx_packet_index;
uint32_t packet_id;
uint8_t generator_id;
uint64_t pkt_rx_time;
- uint64_t pkt_tx_time;
+ uint64_t pkt_tx_time; // Time written into packets by gen. Unit is TSC >> LATENCY_ACCURACY
uint64_t rx_time_err;
};
uint32_t bytes_after_in_bulk;
};
+struct loss_buffer {
+ uint32_t packet_id;
+ uint32_t n;
+};
+
+struct flows {
+ uint32_t packet_id;
+};
+
struct task_lat {
struct task_base base;
uint64_t limit;
uint16_t min_pkt_len;
struct early_loss_detect *eld;
struct rx_pkt_meta_data *rx_pkt_meta;
- uint64_t link_speed;
// Following fields are only used when starting or stopping, not in general runtime
uint64_t *prev_tx_packet_index;
+ FILE *fp_loss;
FILE *fp_rx;
FILE *fp_tx;
struct prox_port_cfg *port;
+ uint64_t *bytes_to_tsc;
+ uint64_t *previous_packet;
+ uint32_t loss_buffer_size;
+ struct loss_buffer *loss_buffer;
+ uint32_t loss_id;
+ uint32_t packet_id_in_flow_pos;
+ int32_t flow_id_pos;
+ uint32_t flow_count;
+ struct flows *flows;
};
/* This function calculate the difference between rx and tx_time
* Both values are uint32_t (see handle_lat_bulk)
return rx_time - tx_time;
}
+uint32_t task_lat_get_latency_bucket_size(struct task_lat *task)
+{
+ return task->lat_test->bucket_size;
+}
+
struct lat_test *task_lat_get_latency_meassurement(struct task_lat *task)
{
if (task->use_lt == task->using_lt)
uint64_t rx_tsc = lat_info_get_rx_tsc(lat_info);
uint64_t tx_tsc = lat_info_get_tx_tsc(lat_info);
- /* Packet n + ACCURACY_BUFFER_SIZE delivers the TX error for packet n,
- hence the last ACCURACY_BUFFER_SIZE packets do no have TX error. */
- if (i + ACCURACY_BUFFER_SIZE >= task->latency_buffer_idx) {
+ /* Packet n + ACCURACY_WINDOW delivers the TX error for packet n,
+ hence the last ACCURACY_WINDOW packets do no have TX error. */
+ if (i + ACCURACY_WINDOW >= task->latency_buffer_idx) {
tx_err_tsc = 0;
}
if (task->unique_id_pos) {
task_lat_count_remaining_lost_packets(task);
task_lat_reset_eld(task);
+ memset(task->previous_packet, 0, sizeof(task->previous_packet) * task->generator_count);
+ }
+ if (task->loss_id && task->fp_loss) {
+ for (uint i = 0; i < task->loss_id; i++) {
+ fprintf(task->fp_loss, "packet %d: %d\n", task->loss_buffer[i].packet_id, task->loss_buffer[i].n);
+ }
}
+ task->lat_test->lost_packets = 0;
if (task->latency_buffer)
lat_write_latency_to_file(task);
}
return early_loss_detect_add(eld, packet_id);
}
-static uint64_t tsc_extrapolate_backward(uint64_t link_speed, uint64_t tsc_from, uint64_t bytes, uint64_t tsc_minimum)
+static void lat_test_check_duplicate(struct task_lat *task, struct lat_test *lat_test, uint32_t packet_id, uint8_t generator_id)
{
- uint64_t tsc = tsc_from - (rte_get_tsc_hz()*bytes)/link_speed;
+ struct early_loss_detect *eld = &task->eld[generator_id];
+ uint32_t old_queue_id, queue_pos;
+
+ queue_pos = packet_id & PACKET_QUEUE_MASK;
+ old_queue_id = eld->entries[queue_pos];
+ if ((packet_id >> PACKET_QUEUE_BITS) == old_queue_id)
+ lat_test->duplicate++;
+}
+
+static uint64_t tsc_extrapolate_backward(struct task_lat *task, uint64_t tsc_from, uint64_t bytes, uint64_t tsc_minimum)
+{
+#ifdef NO_LAT_EXTRAPOLATION
+ uint64_t tsc = tsc_from;
+#else
+ uint64_t tsc = tsc_from - task->bytes_to_tsc[bytes];
+#endif
if (likely(tsc > tsc_minimum))
return tsc;
else
uint64_t bucket_id = (lat_tsc >> lat_test->bucket_size);
size_t bucket_count = sizeof(lat_test->buckets)/sizeof(lat_test->buckets[0]);
- bucket_id = bucket_id < bucket_count? bucket_id : bucket_count;
+ bucket_id = bucket_id < bucket_count? bucket_id : (bucket_count - 1);
lat_test->buckets[bucket_id]++;
}
+static void lat_test_check_flow_ordering(struct task_lat *task, struct lat_test *lat_test, int32_t flow_id, uint32_t packet_id)
+{
+ if (packet_id < task->flows[flow_id].packet_id) {
+ lat_test->mis_ordered++;
+ lat_test->extent += task->flows[flow_id].packet_id - packet_id;
+ }
+ task->flows[flow_id].packet_id = packet_id;
+}
+
+static void lat_test_check_ordering(struct task_lat *task, struct lat_test *lat_test, uint32_t packet_id, uint8_t generator_id)
+{
+ if (packet_id < task->previous_packet[generator_id]) {
+ lat_test->mis_ordered++;
+ lat_test->extent += task->previous_packet[generator_id] - packet_id;
+ }
+ task->previous_packet[generator_id] = packet_id;
+}
+
static void lat_test_add_lost(struct lat_test *lat_test, uint64_t lost_packets)
{
lat_test->lost_packets += lost_packets;
static int handle_lat_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
{
struct task_lat *task = (struct task_lat *)tbase;
+ static int max_flows_printed = 0;
int rc;
- // If link is down, link_speed is 0
- if (unlikely(task->link_speed == 0)) {
- if (task->port && task->port->link_speed != 0) {
- task->link_speed = task->port->link_speed * 125000L;
- plog_info("\tPort %u: link speed is %ld Mbps\n",
- (uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
- } else if (n_pkts) {
- return task->base.tx_pkt(&task->base, mbufs, n_pkts, NULL);
- } else {
- return 0;
- }
- }
-
if (n_pkts == 0) {
task->begin = tbase->aux->tsc_rx.before;
return 0;
// Remember those packets with bad length or bad signature
uint32_t non_dp_count = 0;
- uint64_t pkt_bad_len_sig[(MAX_RX_PKT_ALL + 63) / 64];
-#define BIT64_SET(a64, bit) a64[bit / 64] |= (((uint64_t)1) << (bit & 63))
-#define BIT64_CLR(a64, bit) a64[bit / 64] &= ~(((uint64_t)1) << (bit & 63))
-#define BIT64_TEST(a64, bit) a64[bit / 64] & (((uint64_t)1) << (bit & 63))
+ uint64_t pkt_bad_len_sig = 0;
+#define BIT64_SET(a64, bit) a64 |= (((uint64_t)1) << (bit & 63))
+#define BIT64_CLR(a64, bit) a64 &= ~(((uint64_t)1) << (bit & 63))
+#define BIT64_TEST(a64, bit) a64 & (((uint64_t)1) << (bit & 63))
/* Go once through all received packets and read them. If
packet has just been modified by another core, the cost of
const uint64_t rx_tsc = tbase->aux->tsc_rx.after;
uint64_t rx_time_err;
- uint64_t pkt_rx_time64 = tsc_extrapolate_backward(task->link_speed, rx_tsc, task->rx_pkt_meta[0].bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
+ uint64_t pkt_rx_time64 = tsc_extrapolate_backward(task, rx_tsc, task->rx_pkt_meta[0].bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
if (unlikely((task->begin >> LATENCY_ACCURACY) > pkt_rx_time64)) {
// Extrapolation went up to BEFORE begin => packets were stuck in the NIC but we were not seeing them
rx_time_err = pkt_rx_time64 - (task->last_pkts_tsc >> LATENCY_ACCURACY);
struct rx_pkt_meta_data *rx_pkt_meta = &task->rx_pkt_meta[j];
uint8_t *hdr = rx_pkt_meta->hdr;
- uint32_t pkt_rx_time = tsc_extrapolate_backward(task->link_speed, rx_tsc, rx_pkt_meta->bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
+ uint32_t pkt_rx_time = tsc_extrapolate_backward(task, rx_tsc, rx_pkt_meta->bytes_after_in_bulk, task->last_pkts_tsc) >> LATENCY_ACCURACY;
uint32_t pkt_tx_time = rx_pkt_meta->pkt_tx_time;
uint8_t generator_id;
uint32_t packet_id;
+ int32_t flow_id = -1;
+ if (task->flow_id_pos) {
+ flow_id = *(int32_t *)(hdr + task->flow_id_pos);
+ if (unlikely(flow_id >= (int32_t)(task->flow_count))) {
+ flow_id = -1;
+ if (!max_flows_printed) {
+ plog_info("Too many flows - increase flow count (only printed once)\n");
+ max_flows_printed = 1;
+ }
+ }
+
+ }
+ if (task->packet_id_in_flow_pos && (flow_id != -1)) {
+ uint32_t packet_id_in_flow;
+ struct unique_id *unique_id = (struct unique_id *)(hdr + task->packet_id_in_flow_pos);
+ unique_id_get(unique_id, &generator_id, &packet_id_in_flow);
+ lat_test_check_flow_ordering(task, task->lat_test, flow_id + generator_id * task->generator_count, packet_id_in_flow);
+ }
if (task->unique_id_pos) {
struct unique_id *unique_id = (struct unique_id *)(hdr + task->unique_id_pos);
unique_id_get(unique_id, &generator_id, &packet_id);
// Skip unexpected packet
continue;
}
-
- lat_test_add_lost(task->lat_test, task_lat_early_loss_detect(task, packet_id, generator_id));
+ if (flow_id == -1) {
+ lat_test_check_ordering(task, task->lat_test, packet_id, generator_id);
+ }
+ lat_test_check_duplicate(task, task->lat_test, packet_id, generator_id);
+ uint32_t loss = task_lat_early_loss_detect(task, packet_id, generator_id);
+ if (loss) {
+ lat_test_add_lost(task->lat_test, loss);
+ if (task->loss_id < task->loss_buffer_size) {
+ task->loss_buffer[task->loss_id].packet_id = packet_id;
+ task->loss_buffer[task->loss_id++].n = loss;
+ }
+ }
} else {
generator_id = 0;
packet_id = task->rx_packet_index;
}
/* If accuracy is enabled, latency is reported with a
- delay of ACCURACY_BUFFER_SIZE packets since the generator puts the
- accuracy for packet N into packet N + ACCURACY_BUFFER_SIZE. The delay
+ delay of ACCURACY_WINDOW packets since the generator puts the
+ accuracy for packet N into packet N + ACCURACY_WINDOW. The delay
ensures that all reported latencies have both rx
and tx error. */
if (task->accur_pos) {
uint32_t tx_time_err = *(uint32_t *)(hdr + task->accur_pos);
- struct delayed_latency_entry *delayed_latency_entry = delayed_latency_get(task->delayed_latency_entries, generator_id, packet_id - ACCURACY_BUFFER_SIZE);
+ struct delayed_latency_entry *delayed_latency_entry = delayed_latency_get(task->delayed_latency_entries, generator_id, packet_id - ACCURACY_WINDOW);
if (delayed_latency_entry) {
task_lat_store_lat(task,
task->rx_packet_index++;
}
- task->begin = tbase->aux->tsc_rx.before;
+ if (n_pkts < MAX_PKT_BURST)
+ task->begin = tbase->aux->tsc_rx.before;
task->last_pkts_tsc = tbase->aux->tsc_rx.after;
rc = task->base.tx_pkt(&task->base, mbufs, n_pkts, NULL);
plog_info("\tNo generators found, hard-coding to %u generators\n", task->generator_count);
} else
task->generator_count = *generator_count;
- plog_info("\tLatency using %u generators\n", task->generator_count);
+ plog_info("\t\tLatency using %u generators\n", task->generator_count);
}
static void task_lat_init_eld(struct task_lat *task, uint8_t socket_id)
{
struct task_lat *task = (struct task_lat *)tbase;
- if (task->port) {
- // task->port->link_speed reports the link speed in Mbps e.g. 40k for a 40 Gbps NIC.
- // task->link_speed reports link speed in Bytes per sec.
- // It can be 0 if link is down, and must hence be updated in fast path.
- task->link_speed = task->port->link_speed * 125000L;
- if (task->link_speed)
- plog_info("\tPort %u: link speed is %ld Mbps\n",
- (uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
- else
- plog_info("\tPort %u: link speed is %ld Mbps - link might be down\n",
- (uint8_t)(task->port - prox_port_cfg), 8 * task->link_speed / 1000000);
- }
}
static void init_task_lat(struct task_base *tbase, struct task_args *targ)
task->accur_pos = targ->accur_pos;
task->sig_pos = targ->sig_pos;
task->sig = targ->sig;
+ task->packet_id_in_flow_pos = targ->packet_id_in_flow_pos;
+ task->flow_id_pos = targ->flow_id_pos;
task->unique_id_pos = targ->packet_id_pos;
task->latency_buffer_size = targ->latency_buffer_size;
PROX_PANIC(task->delayed_latency_entries[i] == NULL, "Failed to allocate array for storing delayed latency entries\n");
}
if (task->unique_id_pos == 0) {
- /* When using accuracy feature, the accuracy from TX is written ACCURACY_BUFFER_SIZE packets later
+ /* When using accuracy feature, the accuracy from TX is written ACCURACY_WINDOW packets later
* We can only retrieve the good packet if a packet id is written to it.
- * Otherwise we will use the packet RECEIVED ACCURACY_BUFFER_SIZE packets ago which is OK if
+ * Otherwise we will use the packet RECEIVED ACCURACY_WINDOW packets ago which is OK if
* packets are not re-ordered. If packets are re-ordered, then the matching between
- * the tx accuracy znd the latency is wrong.
+ * the TX accuracy and the latency is wrong.
*/
plog_warn("\tWhen accuracy feature is used, a unique id should ideally also be used\n");
}
}
- task->lt[0].bucket_size = targ->bucket_size - LATENCY_ACCURACY;
- task->lt[1].bucket_size = targ->bucket_size - LATENCY_ACCURACY;
+ task->lt[0].min_lat = -1;
+ task->lt[1].min_lat = -1;
+ task->lt[0].bucket_size = targ->bucket_size;
+ task->lt[1].bucket_size = targ->bucket_size;
if (task->unique_id_pos) {
task_lat_init_eld(task, socket_id);
task_lat_reset_eld(task);
+ task->previous_packet = prox_zmalloc(sizeof(task->previous_packet) * task->generator_count , socket_id);
+ PROX_PANIC(task->previous_packet == NULL, "Failed to allocate array for storing previous packet\n");
}
task->lat_test = &task->lt[task->using_lt];
task_lat_set_accuracy_limit(task, targ->accuracy_limit_nsec);
- task->rx_pkt_meta = prox_zmalloc(MAX_RX_PKT_ALL * sizeof(*task->rx_pkt_meta), socket_id);
+ task->rx_pkt_meta = prox_zmalloc(MAX_PKT_BURST * sizeof(*task->rx_pkt_meta), socket_id);
PROX_PANIC(task->rx_pkt_meta == NULL, "unable to allocate memory to store RX packet meta data");
- task->link_speed = UINT64_MAX;
+ uint32_t max_frame_size = MAX_PKT_SIZE;
+ uint64_t bytes_per_hz = UINT64_MAX;
if (targ->nb_rxports) {
- // task->port structure is only used while starting handle_lat to get the link_speed.
- // link_speed can not be quiried at init as the port has not been initialized yet.
struct prox_port_cfg *port = &prox_port_cfg[targ->rx_port_queue[0].port];
- task->port = port;
+ max_frame_size = port->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN + 2 * PROX_VLAN_TAG_SIZE;
+
+ // port->max_link_speed reports the maximum, non negotiated ink speed in Mbps e.g. 40k for a 40 Gbps NIC.
+ // It can be UINT32_MAX (virtual devices or not supported by DPDK < 16.04)
+ if (port->max_link_speed != UINT32_MAX) {
+ bytes_per_hz = port->max_link_speed * 125000L;
+ plog_info("\t\tPort %u: max link speed is %ld Mbps\n",
+ (uint8_t)(port - prox_port_cfg), 8 * bytes_per_hz / 1000000);
+ }
+ }
+ task->loss_buffer_size = targ->loss_buffer_size;
+ if (task->loss_buffer_size) {
+ char name[256];
+ sprintf(name, "loss_%u.txt", targ->lconf->id);
+ task->fp_loss = fopen(name, "w+");
+ PROX_PANIC(task->fp_loss == NULL, "Failed to open %s\n", name);
+
+ task->loss_buffer = prox_zmalloc(task->loss_buffer_size * sizeof(struct loss_buffer), rte_lcore_to_socket_id(targ->lconf->id));
+ PROX_PANIC(task->loss_buffer == NULL,
+ "Failed to allocate %lu bytes (in huge pages) for loss_buffer\n", task->loss_buffer_size * sizeof(struct loss_buffer));
+ }
+ task->bytes_to_tsc = prox_zmalloc(max_frame_size * sizeof(task->bytes_to_tsc[0]) * MAX_PKT_BURST, rte_lcore_to_socket_id(targ->lconf->id));
+ PROX_PANIC(task->bytes_to_tsc == NULL,
+ "Failed to allocate %lu bytes (in huge pages) for bytes_to_tsc\n", max_frame_size * sizeof(task->bytes_to_tsc[0]) * MAX_PKT_BURST);
+
+ // There are cases where hz estimate might be slighly over-estimated
+ // This results in too much extrapolation
+ // Only account for 99% of extrapolation to handle cases with up to 1% error clocks
+ for (unsigned int i = 0; i < max_frame_size * MAX_PKT_BURST ; i++) {
+ if (bytes_per_hz == UINT64_MAX)
+ task->bytes_to_tsc[i] = 0;
+ else
+ task->bytes_to_tsc[i] = (rte_get_tsc_hz() * i * 0.99) / bytes_per_hz;
+ }
+ task->flow_count = targ->flow_count;
+ PROX_PANIC(task->flow_id_pos && (task->flow_count == 0), "flow_count must be configured when flow_id_pos is set\n");
+ if (task->flow_count) {
+ task->flows = prox_zmalloc(task->flow_count * sizeof(struct flows) * task->generator_count, rte_lcore_to_socket_id(targ->lconf->id));
+ PROX_PANIC(task->flows == NULL,
+ "Failed to allocate %lu bytes (in huge pages) for flows\n", task->flow_count * sizeof(struct flows) * task->generator_count);
}
}
.handle = handle_lat_bulk,
.start = lat_start,
.stop = lat_stop,
- .flag_features = TASK_FEATURE_TSC_RX | TASK_FEATURE_RX_ALL | TASK_FEATURE_ZERO_RX | TASK_FEATURE_NEVER_DISCARDS,
+ .flag_features = TASK_FEATURE_TSC_RX | TASK_FEATURE_ZERO_RX | TASK_FEATURE_NEVER_DISCARDS,
.size = sizeof(struct task_lat)
};