2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <rte_cycles.h>
18 #include <rte_ethdev.h>
19 #include <rte_version.h>
22 #include "task_base.h"
26 #include "mbuf_utils.h"
30 #include "handle_master.h"
31 #include "input.h" /* Needed for callback on dump */
33 #define TCP_PORT_BGP rte_cpu_to_be_16(179)
35 /* _param version of the rx_pkt_hw functions are used to create two
36 instances of very similar variations of these functions. The
37 variations are specified by the "multi" parameter which significies
38 that the rte_eth_rx_burst function should be called multiple times.
39 The reason for this is that with the vector PMD, the maximum number
40 of packets being returned is 32. If packets have been split in
41 multiple mbufs then rte_eth_rx_burst might even receive less than
43 Some algorithms (like QoS) only work correctly if more than 32
44 packets are received if the dequeue step involves finding 32 packets.
49 static uint16_t rx_pkt_hw_port_queue(struct port_queue *pq, struct rte_mbuf **mbufs, int multi)
53 nb_rx = rte_eth_rx_burst(pq->port, pq->queue, mbufs, MAX_PKT_BURST);
57 while (n != 0 && MAX_PKT_BURST - nb_rx >= MIN_PMD_RX) {
58 n = rte_eth_rx_burst(pq->port, pq->queue, mbufs + nb_rx, MIN_PMD_RX);
60 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
66 static void next_port(struct rx_params_hw *rx_params_hw)
68 ++rx_params_hw->last_read_portid;
69 if (unlikely(rx_params_hw->last_read_portid == rx_params_hw->nb_rxports)) {
70 rx_params_hw->last_read_portid = 0;
74 static void next_port_pow2(struct rx_params_hw *rx_params_hw)
76 rx_params_hw->last_read_portid = (rx_params_hw->last_read_portid + 1) & rx_params_hw->rxport_mask;
79 static inline void dump_l3(struct task_base *tbase, struct rte_mbuf *mbuf)
81 if (unlikely(tbase->aux->task_rt_dump.n_print_rx)) {
82 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
83 plogdx_info(mbuf, "RX: ");
85 struct input *input = tbase->aux->task_rt_dump.input;
88 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
89 int port_id = mbuf->port;
91 int port_id = mbuf->pkt.in_port;
93 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
94 rte_pktmbuf_pkt_len(mbuf));
95 input->reply(input, tmp, strlen);
96 input->reply(input, rte_pktmbuf_mtod(mbuf, char *), rte_pktmbuf_pkt_len(mbuf));
97 input->reply(input, "\n", 1);
99 tbase->aux->task_rt_dump.n_print_rx --;
100 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
101 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
104 if (unlikely(tbase->aux->task_rt_dump.n_trace)) {
105 plogdx_info(mbuf, "RX: ");
106 tbase->aux->task_rt_dump.n_trace--;
110 static inline void handle_ipv4(struct task_base *tbase, struct rte_mbuf **mbufs, int i, prox_rte_ipv4_hdr *pip, int *skip)
112 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1);
113 if (pip->next_proto_id == IPPROTO_ICMP) {
114 dump_l3(tbase, mbufs[i]);
115 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_CTRL, mbufs[i]);
117 } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) {
118 dump_l3(tbase, mbufs[i]);
119 tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_CTRL, mbufs[i]);
121 } else if (unlikely(*skip)) {
122 mbufs[i - *skip] = mbufs[i];
125 static inline int handle_l3(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
127 struct rte_mbuf **mbufs = *mbufs_ptr;
129 struct ether_hdr_arp *hdr_arp[MAX_PKT_BURST];
130 prox_rte_ether_hdr *hdr;
131 prox_rte_ipv4_hdr *pip;
132 prox_rte_vlan_hdr *vlan;
135 for (i = 0; i < nb_rx; i++) {
139 for (i = 0; i < nb_rx; i++) {
140 hdr_arp[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *);
141 PREFETCH0(hdr_arp[i]);
143 for (i = 0; i < nb_rx; i++) {
144 if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) {
145 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
146 pip = (prox_rte_ipv4_hdr *)(hdr + 1);
147 handle_ipv4(tbase, mbufs, i, pip, &skip);
149 switch (hdr_arp[i]->ether_hdr.ether_type) {
151 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
152 vlan = (prox_rte_vlan_hdr *)(hdr + 1);
153 if (vlan->eth_proto == ETYPE_IPv4) {
154 pip = (prox_rte_ipv4_hdr *)(vlan + 1);
155 handle_ipv4(tbase, mbufs, i, pip, &skip);
156 } else if (vlan->eth_proto == ETYPE_ARP) {
157 dump_l3(tbase, mbufs[i]);
158 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_TO_CTRL, mbufs[i]);
163 dump_l3(tbase, mbufs[i]);
164 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_TO_CTRL, mbufs[i]);
168 if (unlikely(skip)) {
169 mbufs[i - skip] = mbufs[i];
177 static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi,
178 void (*next)(struct rx_params_hw *rx_param_hw), int l3)
180 uint8_t last_read_portid;
184 START_EMPTY_MEASSURE();
185 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
186 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
188 last_read_portid = tbase->rx_params_hw.last_read_portid;
189 struct port_queue *pq = &tbase->rx_params_hw.rx_pq[last_read_portid];
191 nb_rx = rx_pkt_hw_port_queue(pq, *mbufs_ptr, multi);
192 next(&tbase->rx_params_hw);
195 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
198 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
199 if (likely(nb_rx > 0)) {
200 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
203 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
207 static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, int l3)
212 START_EMPTY_MEASSURE();
213 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
214 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
216 nb_rx = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
217 tbase->rx_params_hw1.rx_pq.queue,
218 *mbufs_ptr, MAX_PKT_BURST);
222 while ((n != 0) && (MAX_PKT_BURST - nb_rx >= MIN_PMD_RX)) {
223 n = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
224 tbase->rx_params_hw1.rx_pq.queue,
225 *mbufs_ptr + nb_rx, MIN_PMD_RX);
227 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
234 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
237 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
238 if (likely(nb_rx > 0)) {
239 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
242 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
246 uint16_t rx_pkt_hw(struct task_base *tbase, struct rte_mbuf ***mbufs)
248 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, 0);
251 uint16_t rx_pkt_hw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
253 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, 0);
256 uint16_t rx_pkt_hw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
258 return rx_pkt_hw1_param(tbase, mbufs, 0, 0);
261 uint16_t rx_pkt_hw_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
263 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, 0);
266 uint16_t rx_pkt_hw_pow2_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
268 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, 0);
271 uint16_t rx_pkt_hw1_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
273 return rx_pkt_hw1_param(tbase, mbufs, 1, 0);
276 uint16_t rx_pkt_hw_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
278 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, 1);
281 uint16_t rx_pkt_hw_pow2_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
283 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, 1);
286 uint16_t rx_pkt_hw1_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
288 return rx_pkt_hw1_param(tbase, mbufs, 0, 1);
291 uint16_t rx_pkt_hw_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
293 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, 1);
296 uint16_t rx_pkt_hw_pow2_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
298 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, 1);
301 uint16_t rx_pkt_hw1_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
303 return rx_pkt_hw1_param(tbase, mbufs, 1, 1);
306 /* The following functions implement ring access */
307 uint16_t ring_deq(struct rte_ring *r, struct rte_mbuf **mbufs)
309 void **v_mbufs = (void **)mbufs;
311 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
312 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST) < 0? 0 : MAX_RING_BURST;
314 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST, NULL);
317 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
318 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST);
320 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST, NULL);
325 uint16_t rx_pkt_sw(struct task_base *tbase, struct rte_mbuf ***mbufs)
327 START_EMPTY_MEASSURE();
328 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
329 uint8_t lr = tbase->rx_params_sw.last_read_ring;
333 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
334 lr = lr + 1 == tbase->rx_params_sw.nb_rxrings? 0 : lr + 1;
335 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
337 tbase->rx_params_sw.last_read_ring = lr;
340 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
344 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
349 /* Same as rx_pkt_sw expect with a mask for the number of receive
350 rings (can only be used if nb_rxring is a power of 2). */
351 uint16_t rx_pkt_sw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
353 START_EMPTY_MEASSURE();
354 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
355 uint8_t lr = tbase->rx_params_sw.last_read_ring;
359 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
360 lr = (lr + 1) & tbase->rx_params_sw.rxrings_mask;
361 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
363 tbase->rx_params_sw.last_read_ring = lr;
366 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
370 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
375 uint16_t rx_pkt_self(struct task_base *tbase, struct rte_mbuf ***mbufs)
377 START_EMPTY_MEASSURE();
378 uint16_t nb_rx = tbase->ws_mbuf->idx[0].nb_rx;
380 tbase->ws_mbuf->idx[0].nb_rx = 0;
381 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
382 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
386 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
391 /* Used for tasks that do not receive packets (i.e. Packet
392 generation). Always returns 1 but never returns packets and does not
393 increment statistics. This function allows to use the same code path
394 as for tasks that actually receive packets. */
395 uint16_t rx_pkt_dummy(__attribute__((unused)) struct task_base *tbase,
396 __attribute__((unused)) struct rte_mbuf ***mbufs)
401 /* After the system has been configured, it is known if there is only
402 one RX ring. If this is the case, a more specialized version of the
403 function above can be used to save cycles. */
404 uint16_t rx_pkt_sw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
406 START_EMPTY_MEASSURE();
407 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
408 uint16_t nb_rx = ring_deq(tbase->rx_params_sw1.rx_ring, *mbufs);
411 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
415 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
420 static uint16_t call_prev_rx_pkt(struct task_base *tbase, struct rte_mbuf ***mbufs)
424 tbase->aux->rx_prev_idx++;
425 ret = tbase->aux->rx_pkt_prev[tbase->aux->rx_prev_idx - 1](tbase, mbufs);
426 tbase->aux->rx_prev_idx--;
431 /* Only used when there are packets to be dumped. This function is
432 meant as a debugging tool and is therefore not optimized. When the
433 number of packets to dump falls back to 0, the original (optimized)
434 rx function is restored. This allows to support dumping packets
435 without any performance impact if the feature is not used. */
436 uint16_t rx_pkt_dump(struct task_base *tbase, struct rte_mbuf ***mbufs)
438 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
441 uint32_t n_dump = tbase->aux->task_rt_dump.n_print_rx;
442 n_dump = ret < n_dump? ret : n_dump;
444 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
445 for (uint32_t i = 0; i < n_dump; ++i) {
446 plogdx_info((*mbufs)[i], "RX: ");
450 struct input *input = tbase->aux->task_rt_dump.input;
452 for (uint32_t i = 0; i < n_dump; ++i) {
453 /* TODO: Execute callback with full
454 data in a single call. */
458 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
459 int port_id = ((*mbufs)[i])->port;
461 int port_id = ((*mbufs)[i])->pkt.in_port;
463 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
464 rte_pktmbuf_pkt_len((*mbufs)[i]));
466 input->reply(input, tmp, strlen);
467 input->reply(input, rte_pktmbuf_mtod((*mbufs)[i], char *), rte_pktmbuf_pkt_len((*mbufs)[i]));
468 input->reply(input, "\n", 1);
472 tbase->aux->task_rt_dump.n_print_rx -= n_dump;
474 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
475 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
481 uint16_t rx_pkt_trace(struct task_base *tbase, struct rte_mbuf ***mbufs)
483 tbase->aux->task_rt_dump.cur_trace = 0;
484 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
487 uint32_t n_trace = tbase->aux->task_rt_dump.n_trace;
488 n_trace = ret < n_trace? ret : n_trace;
489 n_trace = n_trace <= MAX_RING_BURST ? n_trace : MAX_RING_BURST;
491 for (uint32_t i = 0; i < n_trace; ++i) {
492 uint8_t *pkt = rte_pktmbuf_mtod((*mbufs)[i], uint8_t *);
493 rte_memcpy(tbase->aux->task_rt_dump.pkt_cpy[i], pkt, sizeof(tbase->aux->task_rt_dump.pkt_cpy[i]));
494 tbase->aux->task_rt_dump.pkt_cpy_len[i] = rte_pktmbuf_pkt_len((*mbufs)[i]);
495 tbase->aux->task_rt_dump.pkt_mbuf_addr[i] = (*mbufs)[i];
497 tbase->aux->task_rt_dump.cur_trace += n_trace;
499 tbase->aux->task_rt_dump.n_trace -= n_trace;
500 /* Unset by TX when n_trace = 0 */
505 /* Gather the distribution of the number of packets that have been
506 received from one RX call. Since the value is only modified by the
507 task that receives the packet, no atomic operation is needed. */
508 uint16_t rx_pkt_distr(struct task_base *tbase, struct rte_mbuf ***mbufs)
510 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
512 if (likely(ret < RX_BUCKET_SIZE))
513 tbase->aux->rx_bucket[ret]++;
515 tbase->aux->rx_bucket[RX_BUCKET_SIZE - 1]++;
519 uint16_t rx_pkt_bw(struct task_base *tbase, struct rte_mbuf ***mbufs)
521 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
522 uint32_t tot_bytes = 0;
524 for (uint16_t i = 0; i < ret; ++i) {
525 tot_bytes += mbuf_wire_size((*mbufs)[i]);
528 TASK_STATS_ADD_RX_BYTES(&tbase->aux->stats, tot_bytes);
533 uint16_t rx_pkt_tsc(struct task_base *tbase, struct rte_mbuf ***mbufs)
535 uint64_t before = rte_rdtsc();
536 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
537 uint64_t after = rte_rdtsc();
539 tbase->aux->tsc_rx.before = before;
540 tbase->aux->tsc_rx.after = after;