2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <rte_cycles.h>
18 #include <rte_ethdev.h>
19 #include <rte_version.h>
22 #include "task_base.h"
26 #include "mbuf_utils.h"
30 #include "handle_master.h"
32 #include "prox_ipv6.h" /* Needed for callback on dump */
34 #define TCP_PORT_BGP rte_cpu_to_be_16(179)
36 /* _param version of the rx_pkt_hw functions are used to create two
37 instances of very similar variations of these functions. The
38 variations are specified by the "multi" parameter which significies
39 that the rte_eth_rx_burst function should be called multiple times.
40 The reason for this is that with the vector PMD, the maximum number
41 of packets being returned is 32. If packets have been split in
42 multiple mbufs then rte_eth_rx_burst might even receive less than
44 Some algorithms (like QoS) only work correctly if more than 32
45 packets are received if the dequeue step involves finding 32 packets.
52 static uint16_t rx_pkt_hw_port_queue(struct port_queue *pq, struct rte_mbuf **mbufs, int multi)
56 nb_rx = rte_eth_rx_burst(pq->port, pq->queue, mbufs, MAX_PKT_BURST);
60 while (n != 0 && MAX_PKT_BURST - nb_rx >= MIN_PMD_RX) {
61 n = rte_eth_rx_burst(pq->port, pq->queue, mbufs + nb_rx, MIN_PMD_RX);
63 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
69 static void next_port(struct rx_params_hw *rx_params_hw)
71 ++rx_params_hw->last_read_portid;
72 if (unlikely(rx_params_hw->last_read_portid == rx_params_hw->nb_rxports)) {
73 rx_params_hw->last_read_portid = 0;
77 static void next_port_pow2(struct rx_params_hw *rx_params_hw)
79 rx_params_hw->last_read_portid = (rx_params_hw->last_read_portid + 1) & rx_params_hw->rxport_mask;
82 static inline void dump_l3(struct task_base *tbase, struct rte_mbuf *mbuf)
84 if (unlikely(tbase->aux->task_rt_dump.n_print_rx)) {
85 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
86 plogdx_info(mbuf, "RX: ");
88 struct input *input = tbase->aux->task_rt_dump.input;
91 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
92 int port_id = mbuf->port;
94 int port_id = mbuf->pkt.in_port;
96 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
97 rte_pktmbuf_pkt_len(mbuf));
98 input->reply(input, tmp, strlen);
99 input->reply(input, rte_pktmbuf_mtod(mbuf, char *), rte_pktmbuf_pkt_len(mbuf));
100 input->reply(input, "\n", 1);
102 tbase->aux->task_rt_dump.n_print_rx --;
103 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
104 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
107 if (unlikely(tbase->aux->task_rt_dump.n_trace)) {
108 plogdx_info(mbuf, "RX: ");
109 tbase->aux->task_rt_dump.n_trace--;
113 static inline void handle_ipv4(struct task_base *tbase, struct rte_mbuf **mbufs, int i, prox_rte_ipv4_hdr *pip, int *skip)
115 prox_rte_tcp_hdr *tcp = (prox_rte_tcp_hdr *)(pip + 1);
116 if (pip->next_proto_id == IPPROTO_ICMP) {
117 dump_l3(tbase, mbufs[i]);
118 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ICMP_TO_MASTER, mbufs[i]);
120 } else if ((tcp->src_port == TCP_PORT_BGP) || (tcp->dst_port == TCP_PORT_BGP)) {
121 dump_l3(tbase, mbufs[i]);
122 tx_ring(tbase, tbase->l3.ctrl_plane_ring, BGP_TO_MASTER, mbufs[i]);
124 } else if (unlikely(*skip)) {
125 mbufs[i - *skip] = mbufs[i];
128 static inline int handle_l3(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
130 struct rte_mbuf **mbufs = *mbufs_ptr;
132 struct ether_hdr_arp *hdr_arp[MAX_PKT_BURST];
133 prox_rte_ether_hdr *hdr;
134 prox_rte_ipv4_hdr *pip;
135 prox_rte_vlan_hdr *vlan;
138 for (i = 0; i < nb_rx; i++) {
142 for (i = 0; i < nb_rx; i++) {
143 hdr_arp[i] = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr_arp *);
144 PREFETCH0(hdr_arp[i]);
146 for (i = 0; i < nb_rx; i++) {
147 if (likely(hdr_arp[i]->ether_hdr.ether_type == ETYPE_IPv4)) {
148 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
149 pip = (prox_rte_ipv4_hdr *)(hdr + 1);
150 handle_ipv4(tbase, mbufs, i, pip, &skip);
152 switch (hdr_arp[i]->ether_hdr.ether_type) {
154 hdr = (prox_rte_ether_hdr *)hdr_arp[i];
155 vlan = (prox_rte_vlan_hdr *)(hdr + 1);
156 if (vlan->eth_proto == ETYPE_IPv4) {
157 pip = (prox_rte_ipv4_hdr *)(vlan + 1);
158 handle_ipv4(tbase, mbufs, i, pip, &skip);
159 } else if (vlan->eth_proto == ETYPE_ARP) {
160 dump_l3(tbase, mbufs[i]);
161 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
166 dump_l3(tbase, mbufs[i]);
167 tx_ring(tbase, tbase->l3.ctrl_plane_ring, ARP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
171 if (unlikely(skip)) {
172 mbufs[i - skip] = mbufs[i];
180 static inline int handle_ndp(struct task_base *tbase, uint16_t nb_rx, struct rte_mbuf ***mbufs_ptr)
182 struct rte_mbuf **mbufs = *mbufs_ptr;
184 prox_rte_ether_hdr *hdr[MAX_PKT_BURST];
187 for (i = 0; i < nb_rx; i++) {
190 for (i = 0; i < nb_rx; i++) {
191 hdr[i] = rte_pktmbuf_mtod(mbufs[i], prox_rte_ether_hdr *);
194 for (i = 0; i < nb_rx; i++) {
195 prox_rte_ipv6_hdr *ipv6_hdr = (prox_rte_ipv6_hdr *)(hdr[i] + 1);
196 if (unlikely((hdr[i]->ether_type == ETYPE_IPv6) && (ipv6_hdr->proto == ICMPv6))) {
197 dump_l3(tbase, mbufs[i]);
198 tx_ring(tbase, tbase->l3.ctrl_plane_ring, NDP_PKT_FROM_NET_TO_MASTER, mbufs[i]);
200 } else if (unlikely(skip)) {
201 mbufs[i - skip] = mbufs[i];
207 static uint16_t rx_pkt_hw_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi,
208 void (*next)(struct rx_params_hw *rx_param_hw), int l3_ndp)
210 uint8_t last_read_portid;
214 START_EMPTY_MEASSURE();
215 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
216 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
218 last_read_portid = tbase->rx_params_hw.last_read_portid;
219 struct port_queue *pq = &tbase->rx_params_hw.rx_pq[last_read_portid];
221 nb_rx = rx_pkt_hw_port_queue(pq, *mbufs_ptr, multi);
222 next(&tbase->rx_params_hw);
224 if (l3_ndp == PROX_L3)
225 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
226 else if (l3_ndp == PROX_NDP)
227 skip = handle_ndp(tbase, nb_rx, mbufs_ptr);
230 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
231 if (likely(nb_rx > 0)) {
232 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
235 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
239 static inline uint16_t rx_pkt_hw1_param(struct task_base *tbase, struct rte_mbuf ***mbufs_ptr, int multi, int l3_ndp)
244 START_EMPTY_MEASSURE();
245 *mbufs_ptr = tbase->ws_mbuf->mbuf[0] +
246 (RTE_ALIGN_CEIL(tbase->ws_mbuf->idx[0].prod, 2) & WS_MBUF_MASK);
248 nb_rx = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
249 tbase->rx_params_hw1.rx_pq.queue,
250 *mbufs_ptr, MAX_PKT_BURST);
254 while ((n != 0) && (MAX_PKT_BURST - nb_rx >= MIN_PMD_RX)) {
255 n = rte_eth_rx_burst(tbase->rx_params_hw1.rx_pq.port,
256 tbase->rx_params_hw1.rx_pq.queue,
257 *mbufs_ptr + nb_rx, MIN_PMD_RX);
259 PROX_PANIC(nb_rx > 64, "Received %d packets while expecting maximum %d\n", n, MIN_PMD_RX);
263 if (unlikely(nb_rx == 0)) {
264 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
268 if (l3_ndp == PROX_L3)
269 skip = handle_l3(tbase, nb_rx, mbufs_ptr);
270 else if (l3_ndp == PROX_NDP)
271 skip = handle_ndp(tbase, nb_rx, mbufs_ptr);
274 TASK_STATS_ADD_RX_NON_DP(&tbase->aux->stats, skip);
276 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
280 uint16_t rx_pkt_hw(struct task_base *tbase, struct rte_mbuf ***mbufs)
282 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, 0);
285 uint16_t rx_pkt_hw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
287 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, 0);
290 uint16_t rx_pkt_hw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
292 return rx_pkt_hw1_param(tbase, mbufs, 0, 0);
295 uint16_t rx_pkt_hw_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
297 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, 0);
300 uint16_t rx_pkt_hw_pow2_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
302 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, 0);
305 uint16_t rx_pkt_hw1_multi(struct task_base *tbase, struct rte_mbuf ***mbufs)
307 return rx_pkt_hw1_param(tbase, mbufs, 1, 0);
310 uint16_t rx_pkt_hw_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
312 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_L3);
315 uint16_t rx_pkt_hw_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
317 return rx_pkt_hw_param(tbase, mbufs, 0, next_port, PROX_NDP);
320 uint16_t rx_pkt_hw_pow2_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
322 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_L3);
325 uint16_t rx_pkt_hw_pow2_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
327 return rx_pkt_hw_param(tbase, mbufs, 0, next_port_pow2, PROX_NDP);
330 uint16_t rx_pkt_hw1_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
332 return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_L3);
335 uint16_t rx_pkt_hw1_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
337 return rx_pkt_hw1_param(tbase, mbufs, 0, PROX_NDP);
340 uint16_t rx_pkt_hw_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
342 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_L3);
345 uint16_t rx_pkt_hw_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
347 return rx_pkt_hw_param(tbase, mbufs, 1, next_port, PROX_NDP);
350 uint16_t rx_pkt_hw_pow2_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
352 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_L3);
355 uint16_t rx_pkt_hw_pow2_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
357 return rx_pkt_hw_param(tbase, mbufs, 1, next_port_pow2, PROX_NDP);
360 uint16_t rx_pkt_hw1_multi_l3(struct task_base *tbase, struct rte_mbuf ***mbufs)
362 return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_L3);
365 uint16_t rx_pkt_hw1_multi_ndp(struct task_base *tbase, struct rte_mbuf ***mbufs)
367 return rx_pkt_hw1_param(tbase, mbufs, 1, PROX_NDP);
370 /* The following functions implement ring access */
371 uint16_t ring_deq(struct rte_ring *r, struct rte_mbuf **mbufs)
373 void **v_mbufs = (void **)mbufs;
375 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
376 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST) < 0? 0 : MAX_RING_BURST;
378 return rte_ring_sc_dequeue_bulk(r, v_mbufs, MAX_RING_BURST, NULL);
381 #if RTE_VERSION < RTE_VERSION_NUM(17,5,0,1)
382 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST);
384 return rte_ring_sc_dequeue_burst(r, v_mbufs, MAX_RING_BURST, NULL);
389 uint16_t rx_pkt_sw(struct task_base *tbase, struct rte_mbuf ***mbufs)
391 START_EMPTY_MEASSURE();
392 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
393 uint8_t lr = tbase->rx_params_sw.last_read_ring;
397 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
398 lr = lr + 1 == tbase->rx_params_sw.nb_rxrings? 0 : lr + 1;
399 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
401 tbase->rx_params_sw.last_read_ring = lr;
404 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
408 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
413 /* Same as rx_pkt_sw expect with a mask for the number of receive
414 rings (can only be used if nb_rxring is a power of 2). */
415 uint16_t rx_pkt_sw_pow2(struct task_base *tbase, struct rte_mbuf ***mbufs)
417 START_EMPTY_MEASSURE();
418 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
419 uint8_t lr = tbase->rx_params_sw.last_read_ring;
423 nb_rx = ring_deq(tbase->rx_params_sw.rx_rings[lr], *mbufs);
424 lr = (lr + 1) & tbase->rx_params_sw.rxrings_mask;
425 } while(!nb_rx && lr != tbase->rx_params_sw.last_read_ring);
427 tbase->rx_params_sw.last_read_ring = lr;
430 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
434 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
439 uint16_t rx_pkt_self(struct task_base *tbase, struct rte_mbuf ***mbufs)
441 START_EMPTY_MEASSURE();
442 uint16_t nb_rx = tbase->ws_mbuf->idx[0].nb_rx;
444 tbase->ws_mbuf->idx[0].nb_rx = 0;
445 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
446 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
450 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
455 /* Used for tasks that do not receive packets (i.e. Packet
456 generation). Always returns 1 but never returns packets and does not
457 increment statistics. This function allows to use the same code path
458 as for tasks that actually receive packets. */
459 uint16_t rx_pkt_dummy(__attribute__((unused)) struct task_base *tbase,
460 __attribute__((unused)) struct rte_mbuf ***mbufs)
465 /* After the system has been configured, it is known if there is only
466 one RX ring. If this is the case, a more specialized version of the
467 function above can be used to save cycles. */
468 uint16_t rx_pkt_sw1(struct task_base *tbase, struct rte_mbuf ***mbufs)
470 START_EMPTY_MEASSURE();
471 *mbufs = tbase->ws_mbuf->mbuf[0] + (tbase->ws_mbuf->idx[0].prod & WS_MBUF_MASK);
472 uint16_t nb_rx = ring_deq(tbase->rx_params_sw1.rx_ring, *mbufs);
475 TASK_STATS_ADD_RX(&tbase->aux->stats, nb_rx);
479 TASK_STATS_ADD_IDLE(&tbase->aux->stats, rte_rdtsc() - cur_tsc);
484 static uint16_t call_prev_rx_pkt(struct task_base *tbase, struct rte_mbuf ***mbufs)
488 tbase->aux->rx_prev_idx++;
489 ret = tbase->aux->rx_pkt_prev[tbase->aux->rx_prev_idx - 1](tbase, mbufs);
490 tbase->aux->rx_prev_idx--;
495 /* Only used when there are packets to be dumped. This function is
496 meant as a debugging tool and is therefore not optimized. When the
497 number of packets to dump falls back to 0, the original (optimized)
498 rx function is restored. This allows to support dumping packets
499 without any performance impact if the feature is not used. */
500 uint16_t rx_pkt_dump(struct task_base *tbase, struct rte_mbuf ***mbufs)
502 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
505 uint32_t n_dump = tbase->aux->task_rt_dump.n_print_rx;
506 n_dump = ret < n_dump? ret : n_dump;
508 if ((tbase->aux->task_rt_dump.input == NULL) || (tbase->aux->task_rt_dump.input->reply == NULL)) {
509 for (uint32_t i = 0; i < n_dump; ++i) {
510 plogdx_info((*mbufs)[i], "RX: ");
514 struct input *input = tbase->aux->task_rt_dump.input;
516 for (uint32_t i = 0; i < n_dump; ++i) {
517 /* TODO: Execute callback with full
518 data in a single call. */
522 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
523 int port_id = ((*mbufs)[i])->port;
525 int port_id = ((*mbufs)[i])->pkt.in_port;
527 strlen = snprintf(tmp, sizeof(tmp), "pktdump,%d,%d\n", port_id,
528 rte_pktmbuf_pkt_len((*mbufs)[i]));
530 input->reply(input, tmp, strlen);
531 input->reply(input, rte_pktmbuf_mtod((*mbufs)[i], char *), rte_pktmbuf_pkt_len((*mbufs)[i]));
532 input->reply(input, "\n", 1);
536 tbase->aux->task_rt_dump.n_print_rx -= n_dump;
538 if (0 == tbase->aux->task_rt_dump.n_print_rx) {
539 task_base_del_rx_pkt_function(tbase, rx_pkt_dump);
545 uint16_t rx_pkt_trace(struct task_base *tbase, struct rte_mbuf ***mbufs)
547 tbase->aux->task_rt_dump.cur_trace = 0;
548 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
551 uint32_t n_trace = tbase->aux->task_rt_dump.n_trace;
552 n_trace = ret < n_trace? ret : n_trace;
553 n_trace = n_trace <= MAX_RING_BURST ? n_trace : MAX_RING_BURST;
555 for (uint32_t i = 0; i < n_trace; ++i) {
556 uint8_t *pkt = rte_pktmbuf_mtod((*mbufs)[i], uint8_t *);
557 rte_memcpy(tbase->aux->task_rt_dump.pkt_cpy[i], pkt, sizeof(tbase->aux->task_rt_dump.pkt_cpy[i]));
558 tbase->aux->task_rt_dump.pkt_cpy_len[i] = rte_pktmbuf_pkt_len((*mbufs)[i]);
559 tbase->aux->task_rt_dump.pkt_mbuf_addr[i] = (*mbufs)[i];
561 tbase->aux->task_rt_dump.cur_trace += n_trace;
563 tbase->aux->task_rt_dump.n_trace -= n_trace;
564 /* Unset by TX when n_trace = 0 */
569 /* Gather the distribution of the number of packets that have been
570 received from one RX call. Since the value is only modified by the
571 task that receives the packet, no atomic operation is needed. */
572 uint16_t rx_pkt_distr(struct task_base *tbase, struct rte_mbuf ***mbufs)
574 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
576 if (likely(ret < RX_BUCKET_SIZE))
577 tbase->aux->rx_bucket[ret]++;
579 tbase->aux->rx_bucket[RX_BUCKET_SIZE - 1]++;
583 uint16_t rx_pkt_bw(struct task_base *tbase, struct rte_mbuf ***mbufs)
585 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
586 uint32_t tot_bytes = 0;
588 for (uint16_t i = 0; i < ret; ++i) {
589 tot_bytes += mbuf_wire_size((*mbufs)[i]);
592 TASK_STATS_ADD_RX_BYTES(&tbase->aux->stats, tot_bytes);
597 uint16_t rx_pkt_tsc(struct task_base *tbase, struct rte_mbuf ***mbufs)
599 uint64_t before = rte_rdtsc();
600 uint16_t ret = call_prev_rx_pkt(tbase, mbufs);
601 uint64_t after = rte_rdtsc();
603 tbase->aux->tsc_rx.before = before;
604 tbase->aux->tsc_rx.after = after;