2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <rte_cycles.h>
18 #include <rte_ethdev.h>
19 #include <rte_version.h>
22 #include "task_base.h"
26 #include "mbuf_utils.h"
30 #include "handle_master.h"
32 #include "prox_ipv6.h" /* Needed for callback on dump */
34 #define TCP_PORT_BGP rte_cpu_to_be_16(179)
36 /* _param version of the rx_pkt_hw functions are used to create two
37 instances of very similar variations of these functions. The
38 variations are specified by the "multi" parameter which significies
39 that the rte_eth_rx_burst function should be called multiple times.
40 The reason for this is that with the vector PMD, the maximum number
41 of packets being returned is 32. If packets have been split in
42 multiple mbufs then rte_eth_rx_burst might even receive less than
44 Some algorithms (like QoS) only work correctly if more than 32
45 packets are received if the dequeue step involves finding 32 packets.
52 static uint16_t rx_pkt_hw_port_queue(struct port_queue *pq, struct rte_mbuf **mbufs, int multi)
56 nb_rx = rte_eth_rx_burst(pq->port, pq->queue, mbufs, MAX_PKT_BURST);
60 while (n != 0 && MAX_PKT_BURST - nb_rx >= MIN_PMD_RX) {
61 n = rte_eth_rx_burst(pq->port, pq->queue, mbufs + nb_rx, MIN_PMD_RX);
63 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
69 static void next_port(struct rx_params_hw *rx_params_hw)
71 ++rx_params_hw->last_read_portid;
72 if (unlikely(rx_params_hw->last_read_portid == rx_params_hw->nb_rxports)) {
73 rx_params_hw->last_read_portid = 0;
77 static void next_port_pow2(struct rx_params_hw *rx_params_hw)
79 rx_params_hw->last_read_portid = (rx_params_hw->last_read_portid + 1) & rx_params_hw->rxport_mask;
82 static inline void dump_l3(struct task_base *tbase, struct rte_mbuf *mbuf)
84 if (unlikely(tbase->aux->task_rt_dump.n_print_rx)) {
85 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
86 plogdx_info(mbuf, "RX: ");
88 struct input *input = tbase->aux->task_rt_dump.input;
91 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
92 int port_id = mbuf->port;
94 int port_id = mbuf->pkt.in_port;
96 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
97 rte_pktmbuf_pkt_len(mbuf));
98 input->reply(input, tmp, strlen);
99 input->reply(input, rte_pktmbuf_mtod(mbuf, char *), rte_pktmbuf_pkt_len(mbuf));
100 input->reply(input, "\n", 1);
102 tbase->aux->task_rt_dump.n_print_rx --;
103 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
104 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
107 if (unlikely(tbase->aux->task_rt_dump.n_trace)) {
108 plogdx_info(mbuf, "RX: ");
109 tbase->aux->task_rt_dump.n_trace--;
113 static inline void handle_ipv4(struct task_base *tbase, struct rte_mbuf **mbufs, int i, prox_rte_ipv4_hdr *pip, int *skip)
115 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1);
116 if (pip->next_proto_id == IPPROTO_ICMP) {
117 dump_l3(tbase, mbufs[i]);
118 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_MASTER, mbufs[i]);
120 } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) {
121 dump_l3(tbase, mbufs[i]);
122 tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_MASTER, mbufs[i]);
124 } else if (unlikely(*skip)) {
125 mbufs[i - *skip] = mbufs[i];
128 static inline int handle_l3(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
130 struct rte_mbuf **mbufs = *mbufs_ptr;
132 struct ether_hdr_arp *hdr_arp[MAX_PKT_BURST];
133 prox_rte_ether_hdr *hdr;
134 prox_rte_ipv4_hdr *pip;
135 prox_rte_vlan_hdr *vlan;
138 for (i = 0; i < nb_rx; i++) {
142 for (i = 0; i < nb_rx; i++) {
143 hdr_arp[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *);
144 PREFETCH0(hdr_arp[i]);
146 for (i = 0; i < nb_rx; i++) {
147 if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) {
148 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
149 pip = (prox_rte_ipv4_hdr *)(hdr + 1);
150 handle_ipv4(tbase, mbufs, i, pip, &skip);
152 switch (hdr_arp[i]->ether_hdr.ether_type) {
154 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
155 vlan = (prox_rte_vlan_hdr *)(hdr + 1);
156 if (vlan->eth_proto == ETYPE_IPv4) {
157 pip = (prox_rte_ipv4_hdr *)(vlan + 1);
158 handle_ipv4(tbase, mbufs, i, pip, &skip);
159 } else if (vlan->eth_proto == ETYPE_ARP) {
160 dump_l3(tbase, mbufs[i]);
161 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
166 dump_l3(tbase, mbufs[i]);
167 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
171 if (unlikely(skip)) {
172 mbufs[i - skip] = mbufs[i];
180 static inline int handle_ndp(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
182 struct rte_mbuf **mbufs = *mbufs_ptr;
184 prox_rte_ether_hdr *hdr[MAX_PKT_BURST];
187 for (i = 0; i < nb_rx; i++) {
190 for (i = 0; i < nb_rx; i++) {
191 hdr[i] = rte_pktmbuf_mtod(mbufs[i], prox_rte_ether_hdr *);
194 for (i = 0; i < nb_rx; i++) {
195 prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr[i] + 1);
196 if (unlikely((hdr[i]->ether_type == ETYPE_IPv6) && (ipv6_hdr->proto == ICMPv6))) {
197 dump_l3(tbase, mbufs[i]);
198 tx_ring(tbase, tbase->l3.ctrl_plane_ring, NDP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
200 } else if (unlikely(skip)) {
201 mbufs[i - skip] = mbufs[i];
207 static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi,
208 void (*next)(struct rx_params_hw *rx_param_hw), int l3_ndp)
210 uint8_t last_read_portid;
214 START_EMPTY_MEASSURE();
215 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
216 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
218 last_read_portid = tbase->rx_params_hw.last_read_portid;
219 struct port_queue *pq = &tbase->rx_params_hw.rx_pq[last_read_portid];
221 nb_rx = rx_pkt_hw_port_queue(pq, *mbufs_ptr, multi);
222 next(&tbase->rx_params_hw);
224 if (l3_ndp == PROX_L3)
225 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
226 else if (l3_ndp == PROX_NDP)
227 skip = handle_ndp(tbase, nb_rx, mbufs_ptr);
230 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
231 if (likely(nb_rx > 0)) {
232 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
235 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
239 static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, int l3_ndp)
244 START_EMPTY_MEASSURE();
245 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
246 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
248 nb_rx = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
249 tbase->rx_params_hw1.rx_pq.queue,
250 *mbufs_ptr, MAX_PKT_BURST);
254 while ((n != 0) && (MAX_PKT_BURST - nb_rx >= MIN_PMD_RX)) {
255 n = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
256 tbase->rx_params_hw1.rx_pq.queue,
257 *mbufs_ptr + nb_rx, MIN_PMD_RX);
259 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
266 if (l3_ndp == PROX_L3)
267 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
268 else if (l3_ndp == PROX_NDP)
269 skip = handle_ndp(tbase, nb_rx, mbufs_ptr);
272 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
273 if (likely(nb_rx > 0)) {
274 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
277 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
281 uint16_t rx_pkt_hw(struct task_base *tbase, struct rte_mbuf ***mbufs)
283 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, 0);
286 uint16_t rx_pkt_hw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
288 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, 0);
291 uint16_t rx_pkt_hw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
293 return rx_pkt_hw1_param(tbase, mbufs, 0, 0);
296 uint16_t rx_pkt_hw_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
298 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, 0);
301 uint16_t rx_pkt_hw_pow2_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
303 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, 0);
306 uint16_t rx_pkt_hw1_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
308 return rx_pkt_hw1_param(tbase, mbufs, 1, 0);
311 uint16_t rx_pkt_hw_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
313 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_L3);
316 uint16_t rx_pkt_hw_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
318 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_NDP);
321 uint16_t rx_pkt_hw_pow2_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
323 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_L3);
326 uint16_t rx_pkt_hw_pow2_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
328 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_NDP);
331 uint16_t rx_pkt_hw1_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
333 return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_L3);
336 uint16_t rx_pkt_hw1_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
338 return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_NDP);
341 uint16_t rx_pkt_hw_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
343 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_L3);
346 uint16_t rx_pkt_hw_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
348 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_NDP);
351 uint16_t rx_pkt_hw_pow2_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
353 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_L3);
356 uint16_t rx_pkt_hw_pow2_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
358 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_NDP);
361 uint16_t rx_pkt_hw1_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
363 return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_L3);
366 uint16_t rx_pkt_hw1_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
368 return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_NDP);
371 /* The following functions implement ring access */
372 uint16_t ring_deq(struct rte_ring *r, struct rte_mbuf **mbufs)
374 void **v_mbufs = (void **)mbufs;
376 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
377 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST) < 0? 0 : MAX_RING_BURST;
379 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST, NULL);
382 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
383 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST);
385 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST, NULL);
390 uint16_t rx_pkt_sw(struct task_base *tbase, struct rte_mbuf ***mbufs)
392 START_EMPTY_MEASSURE();
393 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
394 uint8_t lr = tbase->rx_params_sw.last_read_ring;
398 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
399 lr = lr + 1 == tbase->rx_params_sw.nb_rxrings? 0 : lr + 1;
400 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
402 tbase->rx_params_sw.last_read_ring = lr;
405 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
409 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
414 /* Same as rx_pkt_sw expect with a mask for the number of receive
415 rings (can only be used if nb_rxring is a power of 2). */
416 uint16_t rx_pkt_sw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
418 START_EMPTY_MEASSURE();
419 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
420 uint8_t lr = tbase->rx_params_sw.last_read_ring;
424 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
425 lr = (lr + 1) & tbase->rx_params_sw.rxrings_mask;
426 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
428 tbase->rx_params_sw.last_read_ring = lr;
431 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
435 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
440 uint16_t rx_pkt_self(struct task_base *tbase, struct rte_mbuf ***mbufs)
442 START_EMPTY_MEASSURE();
443 uint16_t nb_rx = tbase->ws_mbuf->idx[0].nb_rx;
445 tbase->ws_mbuf->idx[0].nb_rx = 0;
446 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
447 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
451 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
456 /* Used for tasks that do not receive packets (i.e. Packet
457 generation). Always returns 1 but never returns packets and does not
458 increment statistics. This function allows to use the same code path
459 as for tasks that actually receive packets. */
460 uint16_t rx_pkt_dummy(__attribute__((unused)) struct task_base *tbase,
461 __attribute__((unused)) struct rte_mbuf ***mbufs)
466 /* After the system has been configured, it is known if there is only
467 one RX ring. If this is the case, a more specialized version of the
468 function above can be used to save cycles. */
469 uint16_t rx_pkt_sw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
471 START_EMPTY_MEASSURE();
472 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
473 uint16_t nb_rx = ring_deq(tbase->rx_params_sw1.rx_ring, *mbufs);
476 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
480 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
485 static uint16_t call_prev_rx_pkt(struct task_base *tbase, struct rte_mbuf ***mbufs)
489 tbase->aux->rx_prev_idx++;
490 ret = tbase->aux->rx_pkt_prev[tbase->aux->rx_prev_idx - 1](tbase, mbufs);
491 tbase->aux->rx_prev_idx--;
496 /* Only used when there are packets to be dumped. This function is
497 meant as a debugging tool and is therefore not optimized. When the
498 number of packets to dump falls back to 0, the original (optimized)
499 rx function is restored. This allows to support dumping packets
500 without any performance impact if the feature is not used. */
501 uint16_t rx_pkt_dump(struct task_base *tbase, struct rte_mbuf ***mbufs)
503 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
506 uint32_t n_dump = tbase->aux->task_rt_dump.n_print_rx;
507 n_dump = ret < n_dump? ret : n_dump;
509 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
510 for (uint32_t i = 0; i < n_dump; ++i) {
511 plogdx_info((*mbufs)[i], "RX: ");
515 struct input *input = tbase->aux->task_rt_dump.input;
517 for (uint32_t i = 0; i < n_dump; ++i) {
518 /* TODO: Execute callback with full
519 data in a single call. */
523 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
524 int port_id = ((*mbufs)[i])->port;
526 int port_id = ((*mbufs)[i])->pkt.in_port;
528 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
529 rte_pktmbuf_pkt_len((*mbufs)[i]));
531 input->reply(input, tmp, strlen);
532 input->reply(input, rte_pktmbuf_mtod((*mbufs)[i], char *), rte_pktmbuf_pkt_len((*mbufs)[i]));
533 input->reply(input, "\n", 1);
537 tbase->aux->task_rt_dump.n_print_rx -= n_dump;
539 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
540 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
546 uint16_t rx_pkt_trace(struct task_base *tbase, struct rte_mbuf ***mbufs)
548 tbase->aux->task_rt_dump.cur_trace = 0;
549 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
552 uint32_t n_trace = tbase->aux->task_rt_dump.n_trace;
553 n_trace = ret < n_trace? ret : n_trace;
554 n_trace = n_trace <= MAX_RING_BURST ? n_trace : MAX_RING_BURST;
556 for (uint32_t i = 0; i < n_trace; ++i) {
557 uint8_t *pkt = rte_pktmbuf_mtod((*mbufs)[i], uint8_t *);
558 rte_memcpy(tbase->aux->task_rt_dump.pkt_cpy[i], pkt, sizeof(tbase->aux->task_rt_dump.pkt_cpy[i]));
559 tbase->aux->task_rt_dump.pkt_cpy_len[i] = rte_pktmbuf_pkt_len((*mbufs)[i]);
560 tbase->aux->task_rt_dump.pkt_mbuf_addr[i] = (*mbufs)[i];
562 tbase->aux->task_rt_dump.cur_trace += n_trace;
564 tbase->aux->task_rt_dump.n_trace -= n_trace;
565 /* Unset by TX when n_trace = 0 */
570 /* Gather the distribution of the number of packets that have been
571 received from one RX call. Since the value is only modified by the
572 task that receives the packet, no atomic operation is needed. */
573 uint16_t rx_pkt_distr(struct task_base *tbase, struct rte_mbuf ***mbufs)
575 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
577 if (likely(ret < RX_BUCKET_SIZE))
578 tbase->aux->rx_bucket[ret]++;
580 tbase->aux->rx_bucket[RX_BUCKET_SIZE - 1]++;
584 uint16_t rx_pkt_bw(struct task_base *tbase, struct rte_mbuf ***mbufs)
586 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
587 uint32_t tot_bytes = 0;
589 for (uint16_t i = 0; i < ret; ++i) {
590 tot_bytes += mbuf_wire_size((*mbufs)[i]);
593 TASK_STATS_ADD_RX_BYTES(&tbase->aux->stats, tot_bytes);
598 uint16_t rx_pkt_tsc(struct task_base *tbase, struct rte_mbuf ***mbufs)
600 uint64_t before = rte_rdtsc();
601 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
602 uint64_t after = rte_rdtsc();
604 tbase->aux->tsc_rx.before = before;
605 tbase->aux->tsc_rx.after = after;