2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <rte_cycles.h>
18 #include <rte_ethdev.h>
19 #include <rte_version.h>
22 #include "task_base.h"
26 #include "mbuf_utils.h"
30 #include "handle_master.h"
32 #include "prox_ipv6.h" /* Needed for callback on dump */
34 #define TCP_PORT_BGP rte_cpu_to_be_16(179)
36 /* _param version of the rx_pkt_hw functions are used to create two
37 instances of very similar variations of these functions. The
38 variations are specified by the "multi" parameter which significies
39 that the rte_eth_rx_burst function should be called multiple times.
40 The reason for this is that with the vector PMD, the maximum number
41 of packets being returned is 32. If packets have been split in
42 multiple mbufs then rte_eth_rx_burst might even receive less than
44 Some algorithms (like QoS) only work correctly if more than 32
45 packets are received if the dequeue step involves finding 32 packets.
52 static uint16_t rx_pkt_hw_port_queue(struct port_queue *pq, struct rte_mbuf **mbufs, int multi)
56 nb_rx = rte_eth_rx_burst(pq->port, pq->queue, mbufs, MAX_PKT_BURST);
60 while (n != 0 && MAX_PKT_BURST - nb_rx >= MIN_PMD_RX) {
61 n = rte_eth_rx_burst(pq->port, pq->queue, mbufs + nb_rx, MIN_PMD_RX);
63 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
69 static void next_port(struct rx_params_hw *rx_params_hw)
71 ++rx_params_hw->last_read_portid;
72 if (unlikely(rx_params_hw->last_read_portid == rx_params_hw->nb_rxports)) {
73 rx_params_hw->last_read_portid = 0;
77 static void next_port_pow2(struct rx_params_hw *rx_params_hw)
79 rx_params_hw->last_read_portid = (rx_params_hw->last_read_portid + 1) & rx_params_hw->rxport_mask;
82 static inline void dump_l3(struct task_base *tbase, struct rte_mbuf *mbuf)
84 if (unlikely(tbase->aux->task_rt_dump.n_print_rx)) {
85 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
86 plogdx_info(mbuf, "RX: ");
88 struct input *input = tbase->aux->task_rt_dump.input;
91 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
92 int port_id = mbuf->port;
94 int port_id = mbuf->pkt.in_port;
96 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
97 rte_pktmbuf_pkt_len(mbuf));
98 input->reply(input, tmp, strlen);
99 input->reply(input, rte_pktmbuf_mtod(mbuf, char *), rte_pktmbuf_pkt_len(mbuf));
100 input->reply(input, "\n", 1);
102 tbase->aux->task_rt_dump.n_print_rx --;
103 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
104 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
107 if (unlikely(tbase->aux->task_rt_dump.n_trace)) {
108 plogdx_info(mbuf, "RX: ");
109 tbase->aux->task_rt_dump.n_trace--;
113 static inline void handle_ipv4(struct task_base *tbase, struct rte_mbuf **mbufs, int i, prox_rte_ipv4_hdr *pip, int *skip)
115 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1);
116 if (pip->next_proto_id == IPPROTO_ICMP) {
117 dump_l3(tbase, mbufs[i]);
118 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_MASTER, mbufs[i]);
120 } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) {
121 dump_l3(tbase, mbufs[i]);
122 tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_MASTER, mbufs[i]);
124 } else if (unlikely(*skip)) {
125 mbufs[i - *skip] = mbufs[i];
128 static inline int handle_l3(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
130 struct rte_mbuf **mbufs = *mbufs_ptr;
132 struct ether_hdr_arp *hdr_arp[MAX_PKT_BURST];
133 prox_rte_ether_hdr *hdr;
134 prox_rte_ipv4_hdr *pip;
135 prox_rte_vlan_hdr *vlan;
138 for (i = 0; i < nb_rx; i++) {
142 for (i = 0; i < nb_rx; i++) {
143 hdr_arp[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *);
144 PREFETCH0(hdr_arp[i]);
146 for (i = 0; i < nb_rx; i++) {
147 if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) {
148 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
149 pip = (prox_rte_ipv4_hdr *)(hdr + 1);
150 handle_ipv4(tbase, mbufs, i, pip, &skip);
152 switch (hdr_arp[i]->ether_hdr.ether_type) {
154 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
155 vlan = (prox_rte_vlan_hdr *)(hdr + 1);
156 if (vlan->eth_proto == ETYPE_IPv4) {
157 pip = (prox_rte_ipv4_hdr *)(vlan + 1);
158 handle_ipv4(tbase, mbufs, i, pip, &skip);
159 } else if (vlan->eth_proto == ETYPE_ARP) {
160 dump_l3(tbase, mbufs[i]);
161 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
166 dump_l3(tbase, mbufs[i]);
167 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
171 if (unlikely(skip)) {
172 mbufs[i - skip] = mbufs[i];
180 static inline int handle_ndp(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
182 struct rte_mbuf **mbufs = *mbufs_ptr;
183 prox_rte_ipv6_hdr *ipv6_hdr;
185 prox_rte_ether_hdr *hdr[MAX_PKT_BURST];
189 for (i = 0; i < nb_rx; i++) {
192 for (i = 0; i < nb_rx; i++) {
193 hdr[i] = rte_pktmbuf_mtod(mbufs[i], prox_rte_ether_hdr *);
196 for (i = 0; i < nb_rx; i++) {
197 ipv6_hdr = prox_get_ipv6_hdr(hdr[i], rte_pktmbuf_pkt_len(mbufs[i]), &vlan);
198 if (unlikely((ipv6_hdr) && (ipv6_hdr->proto == ICMPv6))) {
199 dump_l3(tbase, mbufs[i]);
200 tx_ring(tbase, tbase->l3.ctrl_plane_ring, NDP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
202 } else if (unlikely(skip)) {
203 mbufs[i - skip] = mbufs[i];
209 static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi,
210 void (*next)(struct rx_params_hw *rx_param_hw), int l3_ndp)
212 uint8_t last_read_portid;
216 START_EMPTY_MEASSURE();
217 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
218 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
220 last_read_portid = tbase->rx_params_hw.last_read_portid;
221 struct port_queue *pq = &tbase->rx_params_hw.rx_pq[last_read_portid];
223 nb_rx = rx_pkt_hw_port_queue(pq, *mbufs_ptr, multi);
224 next(&tbase->rx_params_hw);
226 if (l3_ndp == PROX_L3)
227 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
228 else if (l3_ndp == PROX_NDP)
229 skip = handle_ndp(tbase, nb_rx, mbufs_ptr);
232 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
233 if (likely(nb_rx > 0)) {
234 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
237 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
241 static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, int l3_ndp)
246 START_EMPTY_MEASSURE();
247 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
248 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
250 nb_rx = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
251 tbase->rx_params_hw1.rx_pq.queue,
252 *mbufs_ptr, MAX_PKT_BURST);
256 while ((n != 0) && (MAX_PKT_BURST - nb_rx >= MIN_PMD_RX)) {
257 n = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
258 tbase->rx_params_hw1.rx_pq.queue,
259 *mbufs_ptr + nb_rx, MIN_PMD_RX);
261 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
265 if (unlikely(nb_rx == 0)) {
266 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
270 if (l3_ndp == PROX_L3)
271 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
272 else if (l3_ndp == PROX_NDP)
273 skip = handle_ndp(tbase, nb_rx, mbufs_ptr);
276 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
278 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
282 uint16_t rx_pkt_hw(struct task_base *tbase, struct rte_mbuf ***mbufs)
284 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, 0);
287 uint16_t rx_pkt_hw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
289 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, 0);
292 uint16_t rx_pkt_hw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
294 return rx_pkt_hw1_param(tbase, mbufs, 0, 0);
297 uint16_t rx_pkt_hw_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
299 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, 0);
302 uint16_t rx_pkt_hw_pow2_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
304 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, 0);
307 uint16_t rx_pkt_hw1_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
309 return rx_pkt_hw1_param(tbase, mbufs, 1, 0);
312 uint16_t rx_pkt_hw_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
314 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_L3);
317 uint16_t rx_pkt_hw_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
319 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_NDP);
322 uint16_t rx_pkt_hw_pow2_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
324 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_L3);
327 uint16_t rx_pkt_hw_pow2_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
329 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_NDP);
332 uint16_t rx_pkt_hw1_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
334 return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_L3);
337 uint16_t rx_pkt_hw1_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
339 return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_NDP);
342 uint16_t rx_pkt_hw_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
344 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_L3);
347 uint16_t rx_pkt_hw_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
349 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_NDP);
352 uint16_t rx_pkt_hw_pow2_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
354 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_L3);
357 uint16_t rx_pkt_hw_pow2_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
359 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_NDP);
362 uint16_t rx_pkt_hw1_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
364 return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_L3);
367 uint16_t rx_pkt_hw1_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
369 return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_NDP);
372 /* The following functions implement ring access */
373 uint16_t ring_deq(struct rte_ring *r, struct rte_mbuf **mbufs)
375 void **v_mbufs = (void **)mbufs;
377 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
378 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST) < 0? 0 : MAX_RING_BURST;
380 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST, NULL);
383 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
384 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST);
386 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST, NULL);
391 uint16_t rx_pkt_sw(struct task_base *tbase, struct rte_mbuf ***mbufs)
393 START_EMPTY_MEASSURE();
394 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
395 uint8_t lr = tbase->rx_params_sw.last_read_ring;
399 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
400 lr = lr + 1 == tbase->rx_params_sw.nb_rxrings? 0 : lr + 1;
401 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
403 tbase->rx_params_sw.last_read_ring = lr;
406 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
410 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
415 /* Same as rx_pkt_sw expect with a mask for the number of receive
416 rings (can only be used if nb_rxring is a power of 2). */
417 uint16_t rx_pkt_sw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
419 START_EMPTY_MEASSURE();
420 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
421 uint8_t lr = tbase->rx_params_sw.last_read_ring;
425 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
426 lr = (lr + 1) & tbase->rx_params_sw.rxrings_mask;
427 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
429 tbase->rx_params_sw.last_read_ring = lr;
432 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
436 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
441 uint16_t rx_pkt_self(struct task_base *tbase, struct rte_mbuf ***mbufs)
443 START_EMPTY_MEASSURE();
444 uint16_t nb_rx = tbase->ws_mbuf->idx[0].nb_rx;
446 tbase->ws_mbuf->idx[0].nb_rx = 0;
447 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
448 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
452 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
457 /* Used for tasks that do not receive packets (i.e. Packet
458 generation). Always returns 1 but never returns packets and does not
459 increment statistics. This function allows to use the same code path
460 as for tasks that actually receive packets. */
461 uint16_t rx_pkt_dummy(__attribute__((unused)) struct task_base *tbase,
462 __attribute__((unused)) struct rte_mbuf ***mbufs)
467 /* After the system has been configured, it is known if there is only
468 one RX ring. If this is the case, a more specialized version of the
469 function above can be used to save cycles. */
470 uint16_t rx_pkt_sw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
472 START_EMPTY_MEASSURE();
473 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
474 uint16_t nb_rx = ring_deq(tbase->rx_params_sw1.rx_ring, *mbufs);
477 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
481 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
486 static uint16_t call_prev_rx_pkt(struct task_base *tbase, struct rte_mbuf ***mbufs)
490 tbase->aux->rx_prev_idx++;
491 ret = tbase->aux->rx_pkt_prev[tbase->aux->rx_prev_idx - 1](tbase, mbufs);
492 tbase->aux->rx_prev_idx--;
497 /* Only used when there are packets to be dumped. This function is
498 meant as a debugging tool and is therefore not optimized. When the
499 number of packets to dump falls back to 0, the original (optimized)
500 rx function is restored. This allows to support dumping packets
501 without any performance impact if the feature is not used. */
502 uint16_t rx_pkt_dump(struct task_base *tbase, struct rte_mbuf ***mbufs)
504 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
507 uint32_t n_dump = tbase->aux->task_rt_dump.n_print_rx;
508 n_dump = ret < n_dump? ret : n_dump;
510 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
511 for (uint32_t i = 0; i < n_dump; ++i) {
512 plogdx_info((*mbufs)[i], "RX: ");
516 struct input *input = tbase->aux->task_rt_dump.input;
518 for (uint32_t i = 0; i < n_dump; ++i) {
519 /* TODO: Execute callback with full
520 data in a single call. */
524 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
525 int port_id = ((*mbufs)[i])->port;
527 int port_id = ((*mbufs)[i])->pkt.in_port;
529 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
530 rte_pktmbuf_pkt_len((*mbufs)[i]));
532 input->reply(input, tmp, strlen);
533 input->reply(input, rte_pktmbuf_mtod((*mbufs)[i], char *), rte_pktmbuf_pkt_len((*mbufs)[i]));
534 input->reply(input, "\n", 1);
538 tbase->aux->task_rt_dump.n_print_rx -= n_dump;
540 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
541 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
547 uint16_t rx_pkt_trace(struct task_base *tbase, struct rte_mbuf ***mbufs)
549 tbase->aux->task_rt_dump.cur_trace = 0;
550 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
553 uint32_t n_trace = tbase->aux->task_rt_dump.n_trace;
554 n_trace = ret < n_trace? ret : n_trace;
555 n_trace = n_trace <= MAX_RING_BURST ? n_trace : MAX_RING_BURST;
557 for (uint32_t i = 0; i < n_trace; ++i) {
558 uint8_t *pkt = rte_pktmbuf_mtod((*mbufs)[i], uint8_t *);
559 rte_memcpy(tbase->aux->task_rt_dump.pkt_cpy[i], pkt, sizeof(tbase->aux->task_rt_dump.pkt_cpy[i]));
560 tbase->aux->task_rt_dump.pkt_cpy_len[i] = rte_pktmbuf_pkt_len((*mbufs)[i]);
561 tbase->aux->task_rt_dump.pkt_mbuf_addr[i] = (*mbufs)[i];
563 tbase->aux->task_rt_dump.cur_trace += n_trace;
565 tbase->aux->task_rt_dump.n_trace -= n_trace;
566 /* Unset by TX when n_trace = 0 */
571 /* Gather the distribution of the number of packets that have been
572 received from one RX call. Since the value is only modified by the
573 task that receives the packet, no atomic operation is needed. */
574 uint16_t rx_pkt_distr(struct task_base *tbase, struct rte_mbuf ***mbufs)
576 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
578 if (likely(ret < RX_BUCKET_SIZE))
579 tbase->aux->rx_bucket[ret]++;
581 tbase->aux->rx_bucket[RX_BUCKET_SIZE - 1]++;
585 uint16_t rx_pkt_bw(struct task_base *tbase, struct rte_mbuf ***mbufs)
587 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
588 uint32_t tot_bytes = 0;
590 for (uint16_t i = 0; i < ret; ++i) {
591 tot_bytes += mbuf_wire_size((*mbufs)[i]);
594 TASK_STATS_ADD_RX_BYTES(&tbase->aux->stats, tot_bytes);
599 uint16_t rx_pkt_tsc(struct task_base *tbase, struct rte_mbuf ***mbufs)
601 uint64_t before = rte_rdtsc();
602 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
603 uint64_t after = rte_rdtsc();
605 tbase->aux->tsc_rx.before = before;
606 tbase->aux->tsc_rx.after = after;