2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
22 #include <rte_cycles.h>
23 #include <rte_atomic.h>
24 #include <rte_table_hash.h>
25 #include <rte_memzone.h>
26 #include <rte_errno.h>
28 #include "prox_malloc.h"
36 #include "prox_args.h"
37 #include "prox_assert.h"
39 #include "prox_shared.h"
40 #include "prox_port_cfg.h"
42 #include "hash_utils.h"
43 #include "handle_lb_net.h"
44 #include "prox_cksum.h"
45 #include "thread_nop.h"
46 #include "thread_generic.h"
47 #include "thread_pipeline.h"
49 #include "handle_master.h"
51 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
52 #define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
55 uint8_t lb_nb_txrings = 0xff;
56 struct rte_ring *ctrl_rings[RTE_MAX_LCORE*MAX_TASKS_PER_CORE];
58 static void __attribute__((noreturn)) prox_usage(const char *prgname)
60 plog_info("\nUsage: %s [-f CONFIG_FILE] [-a|-e] [-m|-s|-i] [-w DEF] [-u] [-t]\n"
61 "\t-f CONFIG_FILE : configuration file to load, ./prox.cfg by default\n"
62 "\t-l LOG_FILE : log file name, ./prox.log by default\n"
63 "\t-p : include PID in log file name if default log file is used\n"
64 "\t-o DISPLAY: Set display to use, can be 'curses' (default), 'cli' or 'none'\n"
65 "\t-v verbosity : initial logging verbosity\n"
66 "\t-a : autostart all cores (by default)\n"
67 "\t-e : don't autostart\n"
68 "\t-n : Create NULL devices instead of using PCI devices, useful together with -i\n"
69 "\t-m : list supported task modes and exit\n"
70 "\t-s : check configuration file syntax and exit\n"
71 "\t-i : check initialization sequence and exit\n"
72 "\t-u : Listen on UDS /tmp/prox.sock\n"
73 "\t-t : Listen on TCP port 8474\n"
74 "\t-q : Pass argument to Lua interpreter, useful to define variables\n"
75 "\t-w : define variable using syntax varname=value\n"
76 "\t takes precedence over variables defined in CONFIG_FILE\n"
77 "\t-k : Log statistics to file \"stats_dump\" in current directory\n"
78 "\t-d : Run as daemon, the parent process will block until PROX is not initialized\n"
79 "\t-z : Ignore CPU topology, implies -i\n"
80 "\t-r : Change initial screen refresh rate. If set to a lower than 0.001 seconds,\n"
81 "\t screen refreshing will be disabled\n"
86 static void check_mixed_normal_pipeline(void)
88 struct lcore_cfg *lconf = NULL;
89 uint32_t lcore_id = -1;
91 while (prox_core_next(&lcore_id, 0) == 0) {
92 lconf = &lcore_cfg[lcore_id];
94 int all_thread_nop = 1;
98 for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
99 struct task_args *targ = &lconf->targs[task_id];
100 l3 = !strcmp("l3", targ->sub_mode_str);
101 all_thread_nop = all_thread_nop && !l3 &&
102 targ->task_init->thread_x == thread_nop;
104 pipeline = pipeline || targ->task_init->thread_x == thread_pipeline;
105 generic = generic || targ->task_init->thread_x == thread_generic || l3;
107 PROX_PANIC(generic && pipeline, "Can't run both pipeline and normal thread on same core\n");
110 lconf->thread_x = thread_nop;
112 lconf->thread_x = thread_generic;
117 static void check_zero_rx(void)
119 struct lcore_cfg *lconf = NULL;
120 struct task_args *targ;
122 while (core_targ_next(&lconf, &targ, 0) == 0) {
123 if (targ->nb_rxports != 0) {
124 PROX_PANIC(task_init_flag_set(targ->task_init, TASK_FEATURE_NO_RX),
125 "\tCore %u task %u: rx_ports configured while mode %s does not use it\n", lconf->id, targ->id, targ->task_init->mode_str);
130 static void check_missing_rx(void)
132 struct lcore_cfg *lconf = NULL, *rx_lconf = NULL, *tx_lconf = NULL;
133 struct task_args *targ, *rx_targ = NULL, *tx_targ = NULL;
134 struct prox_port_cfg *port;
135 uint8_t port_id, rx_port_id, ok;
137 while (core_targ_next(&lconf, &targ, 0) == 0) {
138 PROX_PANIC((targ->flags & TASK_ARG_RX_RING) && targ->rx_rings[0] == 0 && !targ->tx_opt_ring_task,
139 "Configuration Error - Core %u task %u Receiving from ring, but nobody xmitting to this ring\n", lconf->id, targ->id);
140 if (targ->nb_rxports == 0 && targ->nb_rxrings == 0) {
141 PROX_PANIC(!task_init_flag_set(targ->task_init, TASK_FEATURE_NO_RX),
142 "\tCore %u task %u: no rx_ports and no rx_rings configured while required by mode %s\n", lconf->id, targ->id, targ->task_init->mode_str);
147 while (core_targ_next(&lconf, &targ, 0) == 0) {
148 if (strcmp(targ->sub_mode_str, "l3") != 0)
151 PROX_PANIC((targ->nb_rxports == 0) && (targ->nb_txports == 0), "L3 task must have a RX or a TX port\n");
152 // If the L3 sub_mode receives from a port, check that there is at least one core/task
153 // transmitting to this port in L3 sub_mode
154 for (uint8_t i = 0; i < targ->nb_rxports; ++i) {
155 rx_port_id = targ->rx_port_queue[i].port;
158 while (core_targ_next(&tx_lconf, &tx_targ, 0) == 0) {
159 if ((port_id = tx_targ->tx_port_queue[0].port) == OUT_DISCARD)
161 if ((rx_port_id == port_id) && (tx_targ->flags & TASK_ARG_L3)){
166 PROX_PANIC(ok == 0, "RX L3 sub mode for port %d on core %d task %d, but no core/task transmitting on that port\n", rx_port_id, lconf->id, targ->id);
169 // If the L3 sub_mode transmits to a port, check that there is at least one core/task
170 // receiving from that port in L3 sub_mode.
171 if ((port_id = targ->tx_port_queue[0].port) == OUT_DISCARD)
175 plog_info("\tCore %d task %d transmitting to port %d in L3 mode\n", lconf->id, targ->id, port_id);
176 while (core_targ_next(&rx_lconf, &rx_targ, 0) == 0) {
177 for (uint8_t i = 0; i < rx_targ->nb_rxports; ++i) {
178 rx_port_id = rx_targ->rx_port_queue[i].port;
179 if ((rx_port_id == port_id) && (rx_targ->flags & TASK_ARG_L3)){
185 plog_info("\tCore %d task %d has found core %d task %d receiving from port %d\n", lconf->id, targ->id, rx_lconf->id, rx_targ->id, port_id);
189 PROX_PANIC(ok == 0, "L3 sub mode for port %d on core %d task %d, but no core/task receiving on that port\n", port_id, lconf->id, targ->id);
193 static void check_cfg_consistent(void)
197 check_mixed_normal_pipeline();
200 static void plog_all_rings(void)
202 struct lcore_cfg *lconf = NULL;
203 struct task_args *targ;
205 while (core_targ_next(&lconf, &targ, 0) == 0) {
206 for (uint8_t ring_idx = 0; ring_idx < targ->nb_rxrings; ++ring_idx) {
207 plog_info("\tCore %u, task %u, rx_ring[%u] %p\n", lconf->id, targ->id, ring_idx, targ->rx_rings[ring_idx]);
212 static int chain_flag_state(struct task_args *targ, uint64_t flag, int is_set)
214 if (task_init_flag_set(targ->task_init, flag) == is_set)
219 for (uint32_t i = 0; i < targ->n_prev_tasks; ++i) {
220 ret = chain_flag_state(targ->prev_tasks[i], flag, is_set);
227 static void configure_if_tx_queues(struct task_args *targ, uint8_t socket)
231 for (uint8_t i = 0; i < targ->nb_txports; ++i) {
232 if_port = targ->tx_port_queue[i].port;
234 PROX_PANIC(if_port == OUT_DISCARD, "port misconfigured, exiting\n");
236 PROX_PANIC(!prox_port_cfg[if_port].active, "\tPort %u not used, skipping...\n", if_port);
238 int dsocket = prox_port_cfg[if_port].socket;
239 if (dsocket != -1 && dsocket != socket) {
240 plog_warn("TX core on socket %d while device on socket %d\n", socket, dsocket);
243 if (prox_port_cfg[if_port].tx_ring[0] == '\0') { // Rings-backed port can use single queue
244 targ->tx_port_queue[i].queue = prox_port_cfg[if_port].n_txq;
245 prox_port_cfg[if_port].n_txq++;
247 prox_port_cfg[if_port].n_txq = 1;
248 targ->tx_port_queue[i].queue = 0;
250 /* Set the ETH_TXQ_FLAGS_NOREFCOUNT flag if none of
251 the tasks up to the task transmitting to the port
252 does not use refcnt. */
253 if (!chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_REFCOUNT, 1)) {
254 prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOREFCOUNT;
255 plog_info("\t\tEnabling No refcnt on port %d\n", if_port);
258 plog_info("\t\tRefcnt used on port %d\n", if_port);
261 /* By default OFFLOAD is enabled, but if the whole
262 chain has NOOFFLOADS set all the way until the
263 first task that receives from a port, it will be
264 disabled for the destination port. */
265 if (chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS, 1)) {
266 prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
267 plog_info("\t\tDisabling TX offloads on port %d\n", if_port);
269 plog_info("\t\tEnabling TX offloads on port %d\n", if_port);
272 /* By default NOMULTSEGS is disabled, as drivers/NIC might split packets on RX
273 It should only be enabled when we know for sure that the RX does not split packets.
274 Set the ETH_TXQ_FLAGS_NOMULTSEGS flag if none of the tasks up to the task
275 transmitting to the port does not use multsegs. */
276 if (!chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS, 0)) {
277 prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
278 plog_info("\t\tEnabling No MultiSegs on port %d\n", if_port);
281 plog_info("\t\tMultiSegs used on port %d\n", if_port);
286 static void configure_if_rx_queues(struct task_args *targ, uint8_t socket)
288 for (int i = 0; i < targ->nb_rxports; i++) {
289 uint8_t if_port = targ->rx_port_queue[i].port;
291 if (if_port == OUT_DISCARD) {
295 PROX_PANIC(!prox_port_cfg[if_port].active, "Port %u not used, aborting...\n", if_port);
297 if(prox_port_cfg[if_port].rx_ring[0] != '\0') {
298 prox_port_cfg[if_port].n_rxq = 0;
301 targ->rx_port_queue[i].queue = prox_port_cfg[if_port].n_rxq;
302 prox_port_cfg[if_port].pool[targ->rx_port_queue[i].queue] = targ->pool;
303 prox_port_cfg[if_port].pool_size[targ->rx_port_queue[i].queue] = targ->nb_mbuf - 1;
304 prox_port_cfg[if_port].n_rxq++;
306 int dsocket = prox_port_cfg[if_port].socket;
307 if (dsocket != -1 && dsocket != socket) {
308 plog_warn("RX core on socket %d while device on socket %d\n", socket, dsocket);
313 static void configure_if_queues(void)
315 struct lcore_cfg *lconf = NULL;
316 struct task_args *targ;
319 while (core_targ_next(&lconf, &targ, 0) == 0) {
320 socket = rte_lcore_to_socket_id(lconf->id);
322 configure_if_tx_queues(targ, socket);
323 configure_if_rx_queues(targ, socket);
327 static const char *gen_ring_name(void)
329 static char retval[] = "XX";
330 static const char* ring_names =
331 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
332 "abcdefghijklmnopqrstuvwxyz"
333 "[\\]^_`!\"#$%&'()*+,-./:;<="
339 retval[0] = ring_names[idx % strlen(ring_names)];
340 idx /= strlen(ring_names);
341 retval[1] = idx ? ring_names[(idx - 1) % strlen(ring_names)] : 0;
348 struct ring_init_stats {
349 uint32_t n_pkt_rings;
350 uint32_t n_ctrl_rings;
351 uint32_t n_opt_rings;
354 static uint32_t ring_init_stats_total(const struct ring_init_stats *ris)
356 return ris->n_pkt_rings + ris->n_ctrl_rings + ris->n_opt_rings;
359 static uint32_t count_incoming_tasks(uint32_t lcore_worker, uint32_t dest_task)
361 struct lcore_cfg *lconf = NULL;
362 struct task_args *targ;
366 while (core_targ_next(&lconf, &targ, 0) == 0) {
367 for (uint8_t idxx = 0; idxx < MAX_PROTOCOLS; ++idxx) {
368 for (uint8_t ridx = 0; ridx < targ->core_task_set[idxx].n_elems; ++ridx) {
369 ct = targ->core_task_set[idxx].core_task[ridx];
371 if (dest_task == ct.task && lcore_worker == ct.core)
379 static struct rte_ring *get_existing_ring(uint32_t lcore_id, uint32_t task_id)
381 if (!prox_core_active(lcore_id, 0))
384 struct lcore_cfg *lconf = &lcore_cfg[lcore_id];
386 if (task_id >= lconf->n_tasks_all)
389 if (lconf->targs[task_id].nb_rxrings == 0)
392 return lconf->targs[task_id].rx_rings[0];
395 static struct rte_ring *init_ring_between_tasks(struct lcore_cfg *lconf, struct task_args *starg,
396 const struct core_task ct, uint8_t ring_idx, int idx,
397 struct ring_init_stats *ris)
400 struct rte_ring *ring = NULL;
401 struct lcore_cfg *lworker;
402 struct task_args *dtarg;
404 PROX_ASSERT(prox_core_active(ct.core, 0));
405 lworker = &lcore_cfg[ct.core];
407 /* socket used is the one that the sending core resides on */
408 socket = rte_lcore_to_socket_id(lconf->id);
410 plog_info("\t\tCreating ring on socket %u with size %u\n"
411 "\t\t\tsource core, task and socket = %u, %u, %u\n"
412 "\t\t\tdestination core, task and socket = %u, %u, %u\n"
413 "\t\t\tdestination worker id = %u\n",
414 socket, starg->ring_size,
415 lconf->id, starg->id, socket,
416 ct.core, ct.task, rte_lcore_to_socket_id(ct.core),
420 struct rte_ring **dring = NULL;
422 if (ct.type == CTRL_TYPE_MSG)
423 dring = &lworker->ctrl_rings_m[ct.task];
424 else if (ct.type == CTRL_TYPE_PKT) {
425 dring = &lworker->ctrl_rings_p[ct.task];
426 starg->flags |= TASK_ARG_CTRL_RINGS_P;
430 ring = rte_ring_create(gen_ring_name(), starg->ring_size, socket, RING_F_SC_DEQ);
433 PROX_PANIC(ring == NULL, "Cannot create ring to connect I/O core %u with worker core %u\n", lconf->id, ct.core);
435 starg->tx_rings[starg->tot_n_txrings_inited] = ring;
436 starg->tot_n_txrings_inited++;
438 if (lconf->id == prox_cfg.master) {
439 ctrl_rings[ct.core*MAX_TASKS_PER_CORE + ct.task] = ring;
440 } else if (ct.core == prox_cfg.master) {
441 starg->ctrl_plane_ring = ring;
444 plog_info("\t\tCore %u task %u to -> core %u task %u ctrl_ring %s %p %s\n",
445 lconf->id, starg->id, ct.core, ct.task, ct.type == CTRL_TYPE_PKT?
446 "pkt" : "msg", ring, ring->name);
451 dtarg = &lworker->targs[ct.task];
452 lworker->targs[ct.task].worker_thread_id = ring_idx;
453 PROX_ASSERT(dtarg->flags & TASK_ARG_RX_RING);
454 PROX_ASSERT(ct.task < lworker->n_tasks_all);
456 /* If all the following conditions are met, the ring can be
458 if (!task_is_master(starg) && !task_is_master(dtarg) && starg->lconf->id == dtarg->lconf->id &&
459 starg->nb_txrings == 1 && idx == 0 && dtarg->task &&
460 dtarg->tot_rxrings == 1 && starg->task == dtarg->task - 1) {
461 plog_info("\t\tOptimizing away ring on core %u from task %u to task %u\n",
462 dtarg->lconf->id, starg->task, dtarg->task);
463 /* No need to set up ws_mbuf. */
464 starg->tx_opt_ring = 1;
465 /* During init of destination task, the buffer in the
466 source task will be initialized. */
467 dtarg->tx_opt_ring_task = starg;
473 int ring_created = 1;
474 /* Only create multi-producer rings if configured to do so AND
475 there is only one task sending to the task */
476 if ((prox_cfg.flags & DSF_MP_RINGS && count_incoming_tasks(ct.core, ct.task) > 1)
477 || (prox_cfg.flags & DSF_ENABLE_BYPASS)) {
478 ring = get_existing_ring(ct.core, ct.task);
481 plog_info("\t\tCore %u task %u creatign MP ring %p to core %u task %u\n",
482 lconf->id, starg->id, ring, ct.core, ct.task);
486 ring = rte_ring_create(gen_ring_name(), starg->ring_size, socket, RING_F_SC_DEQ);
487 plog_info("\t\tCore %u task %u using MP ring %p from core %u task %u\n",
488 lconf->id, starg->id, ring, ct.core, ct.task);
492 ring = rte_ring_create(gen_ring_name(), starg->ring_size, socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
494 PROX_PANIC(ring == NULL, "Cannot create ring to connect I/O core %u with worker core %u\n", lconf->id, ct.core);
496 starg->tx_rings[starg->tot_n_txrings_inited] = ring;
497 starg->tot_n_txrings_inited++;
500 PROX_ASSERT(dtarg->nb_rxrings < MAX_RINGS_PER_TASK);
501 dtarg->rx_rings[dtarg->nb_rxrings] = ring;
504 dtarg->nb_slave_threads = starg->core_task_set[idx].n_elems;
505 dtarg->lb_friend_core = lconf->id;
506 dtarg->lb_friend_task = starg->id;
507 plog_info("\t\tWorker thread %d has core %d, task %d as a lb friend\n", ct.core, lconf->id, starg->id);
508 plog_info("\t\tCore %u task %u tx_ring[%u] -> core %u task %u rx_ring[%u] %p %s %u WT\n",
509 lconf->id, starg->id, ring_idx, ct.core, ct.task, dtarg->nb_rxrings, ring, ring->name,
510 dtarg->nb_slave_threads);
515 static void init_rings(void)
517 struct lcore_cfg *lconf = NULL;
518 struct task_args *starg;
519 struct ring_init_stats ris = {0};
521 while (core_targ_next(&lconf, &starg, 1) == 0) {
522 plog_info("\t*** Initializing rings on core %u, task %u ***\n", lconf->id, starg->id);
523 for (uint8_t idx = 0; idx < MAX_PROTOCOLS; ++idx) {
524 for (uint8_t ring_idx = 0; ring_idx < starg->core_task_set[idx].n_elems; ++ring_idx) {
525 PROX_ASSERT(ring_idx < MAX_WT_PER_LB);
526 PROX_ASSERT(starg->tot_n_txrings_inited < MAX_RINGS_PER_TASK);
528 struct core_task ct = starg->core_task_set[idx].core_task[ring_idx];
529 init_ring_between_tasks(lconf, starg, ct, ring_idx, idx, &ris);
534 plog_info("\tInitialized %d rings:\n"
535 "\t\tNumber of packet rings: %u\n"
536 "\t\tNumber of control rings: %u\n"
537 "\t\tNumber of optimized rings: %u\n",
538 ring_init_stats_total(&ris),
544 struct prox_port_cfg *port;
545 while (core_targ_next(&lconf, &starg, 1) == 0) {
546 if ((starg->task_init) && (starg->flags & TASK_ARG_L3)) {
548 ct.core = prox_cfg.master;
550 ct.type = CTRL_TYPE_PKT;
551 struct rte_ring *rx_ring = init_ring_between_tasks(lconf, starg, ct, 0, 0, &ris);
554 ct.task = starg->id;;
555 struct rte_ring *tx_ring = init_ring_between_tasks(lcore_cfg, lcore_cfg[prox_cfg.master].targs, ct, 0, 0, &ris);
560 static void shuffle_mempool(struct rte_mempool* mempool, uint32_t nb_mbuf)
562 struct rte_mbuf** pkts = prox_zmalloc(nb_mbuf * sizeof(*pkts), rte_socket_id());
565 while (rte_mempool_get_bulk(mempool, (void**)(pkts + got), 1) == 0)
571 idx = rand() % nb_mbuf - 1;
572 } while (pkts[idx] == 0);
574 rte_mempool_put_bulk(mempool, (void**)&pkts[idx], 1);
581 static void setup_mempools_unique_per_socket(void)
585 struct lcore_cfg *lconf = NULL;
586 struct task_args *targ;
588 struct rte_mempool *pool[MAX_SOCKETS];
589 uint32_t mbuf_count[MAX_SOCKETS] = {0};
590 uint32_t nb_cache_mbuf[MAX_SOCKETS] = {0};
591 uint32_t mbuf_size[MAX_SOCKETS] = {0};
593 while (core_targ_next_early(&lconf, &targ, 0) == 0) {
594 PROX_PANIC(targ->task_init == NULL, "task_init = NULL, is mode specified for core %d, task %d ?\n", lconf->id, targ->id);
595 uint8_t socket = rte_lcore_to_socket_id(lconf->id);
596 PROX_ASSERT(socket < MAX_SOCKETS);
598 if (targ->mbuf_size_set_explicitely)
599 flags = MEMPOOL_F_NO_SPREAD;
600 if ((!targ->mbuf_size_set_explicitely) && (targ->task_init->mbuf_size != 0)) {
601 targ->mbuf_size = targ->task_init->mbuf_size;
603 if (targ->rx_port_queue[0].port != OUT_DISCARD) {
604 struct prox_port_cfg* port_cfg = &prox_port_cfg[targ->rx_port_queue[0].port];
605 PROX_ASSERT(targ->nb_mbuf != 0);
606 mbuf_count[socket] += targ->nb_mbuf;
607 if (nb_cache_mbuf[socket] == 0)
608 nb_cache_mbuf[socket] = targ->nb_cache_mbuf;
610 PROX_PANIC(nb_cache_mbuf[socket] != targ->nb_cache_mbuf,
611 "all mbuf_cache must have the same size if using a unique mempool per socket\n");
613 if (mbuf_size[socket] == 0)
614 mbuf_size[socket] = targ->mbuf_size;
616 PROX_PANIC(mbuf_size[socket] != targ->mbuf_size,
617 "all mbuf_size must have the same size if using a unique mempool per socket\n");
619 if ((!targ->mbuf_size_set_explicitely) && (strcmp(port_cfg->short_name, "vmxnet3") == 0)) {
620 if (mbuf_size[socket] < MBUF_SIZE + RTE_PKTMBUF_HEADROOM)
621 mbuf_size[socket] = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
625 for (int i = 0 ; i < MAX_SOCKETS; i++) {
626 if (mbuf_count[i] != 0) {
627 sprintf(name, "socket_%u_pool", i);
628 pool[i] = rte_mempool_create(name,
629 mbuf_count[i] - 1, mbuf_size[i],
631 sizeof(struct rte_pktmbuf_pool_private),
632 rte_pktmbuf_pool_init, NULL,
633 prox_pktmbuf_init, NULL,
635 PROX_PANIC(pool[i] == NULL, "\t\tError: cannot create mempool for socket %u\n", i);
636 plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", pool[i],
637 mbuf_count[i], mbuf_size[i], nb_cache_mbuf[i], i);
639 if (prox_cfg.flags & DSF_SHUFFLE) {
640 shuffle_mempool(pool[i], mbuf_count[i]);
646 while (core_targ_next_early(&lconf, &targ, 0) == 0) {
647 uint8_t socket = rte_lcore_to_socket_id(lconf->id);
649 if (targ->rx_port_queue[0].port != OUT_DISCARD) {
650 /* use this pool for the interface that the core is receiving from */
651 /* If one core receives from multiple ports, all the ports use the same mempool */
652 targ->pool = pool[socket];
653 /* Set the number of mbuf to the number of the unique mempool, so that the used and free work */
654 targ->nb_mbuf = mbuf_count[socket];
655 plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", targ->pool,
656 targ->nb_mbuf, mbuf_size[socket], targ->nb_cache_mbuf, socket);
661 static void setup_mempool_for_rx_task(struct lcore_cfg *lconf, struct task_args *targ)
663 const uint8_t socket = rte_lcore_to_socket_id(lconf->id);
664 struct prox_port_cfg *port_cfg = &prox_port_cfg[targ->rx_port_queue[0].port];
665 const struct rte_memzone *mz;
666 struct rte_mempool *mp = NULL;
668 char memzone_name[64];
671 /* mbuf size can be set
672 * - from config file (highest priority, overwriting any other config) - should only be used as workaround
673 * - through each 'mode', overwriting the default mbuf_size
674 * - defaulted to MBUF_SIZE i.e. 1518 Bytes
675 * Except is set expliciteky, ensure that size is big enough for vmxnet3 driver
677 if (targ->mbuf_size_set_explicitely) {
678 flags = MEMPOOL_F_NO_SPREAD;
679 /* targ->mbuf_size already set */
681 else if (targ->task_init->mbuf_size != 0) {
682 /* mbuf_size not set through config file but set through mode */
683 targ->mbuf_size = targ->task_init->mbuf_size;
685 else if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
686 if (targ->mbuf_size < MBUF_SIZE + RTE_PKTMBUF_HEADROOM)
687 targ->mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
690 /* allocate memory pool for packets */
691 PROX_ASSERT(targ->nb_mbuf != 0);
693 if (targ->pool_name[0] == '\0') {
694 sprintf(name, "core_%u_port_%u_pool", lconf->id, targ->id);
697 snprintf(memzone_name, sizeof(memzone_name)-1, "MP_%s", targ->pool_name);
698 mz = rte_memzone_lookup(memzone_name);
701 mp = (struct rte_mempool*)mz->addr;
703 targ->nb_mbuf = mp->size;
707 #ifdef RTE_LIBRTE_IVSHMEM_FALSE
708 if (mz != NULL && mp != NULL && mp->phys_addr != mz->ioremap_addr) {
709 /* Init mbufs with ioremap_addr for dma */
710 mp->phys_addr = mz->ioremap_addr;
711 mp->elt_pa[0] = mp->phys_addr + (mp->elt_va_start - (uintptr_t)mp);
713 struct prox_pktmbuf_reinit_args init_args;
715 init_args.lconf = lconf;
717 uint32_t elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
718 rte_mempool_obj_iter((void*)mp->elt_va_start, mp->size, elt_sz, 1,
719 mp->elt_pa, mp->pg_num, mp->pg_shift, prox_pktmbuf_reinit, &init_args);
723 /* Use this pool for the interface that the core is
724 receiving from if one core receives from multiple
725 ports, all the ports use the same mempool */
726 if (targ->pool == NULL) {
727 plog_info("\t\tCreating mempool with name '%s'\n", name);
728 targ->pool = rte_mempool_create(name,
729 targ->nb_mbuf - 1, targ->mbuf_size,
731 sizeof(struct rte_pktmbuf_pool_private),
732 rte_pktmbuf_pool_init, NULL,
733 prox_pktmbuf_init, lconf,
737 PROX_PANIC(targ->pool == NULL,
738 "\t\tError: cannot create mempool for core %u port %u: %s\n", lconf->id, targ->id, rte_strerror(rte_errno));
740 plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", targ->pool,
741 targ->nb_mbuf, targ->mbuf_size, targ->nb_cache_mbuf, socket);
742 if (prox_cfg.flags & DSF_SHUFFLE) {
743 shuffle_mempool(targ->pool, targ->nb_mbuf);
747 static void setup_mempools_multiple_per_socket(void)
749 struct lcore_cfg *lconf = NULL;
750 struct task_args *targ;
752 while (core_targ_next_early(&lconf, &targ, 0) == 0) {
753 PROX_PANIC(targ->task_init == NULL, "task_init = NULL, is mode specified for core %d, task %d ?\n", lconf->id, targ->id);
754 if (targ->rx_port_queue[0].port == OUT_DISCARD)
756 setup_mempool_for_rx_task(lconf, targ);
760 static void setup_mempools(void)
762 if (prox_cfg.flags & UNIQUE_MEMPOOL_PER_SOCKET)
763 setup_mempools_unique_per_socket();
765 setup_mempools_multiple_per_socket();
768 static void set_task_lconf(void)
770 struct lcore_cfg *lconf;
771 uint32_t lcore_id = -1;
773 while(prox_core_next(&lcore_id, 1) == 0) {
774 lconf = &lcore_cfg[lcore_id];
775 for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
776 lconf->targs[task_id].lconf = lconf;
781 static void set_dest_threads(void)
783 struct lcore_cfg *lconf = NULL;
784 struct task_args *targ;
786 while (core_targ_next(&lconf, &targ, 0) == 0) {
787 for (uint8_t idx = 0; idx < MAX_PROTOCOLS; ++idx) {
788 for (uint8_t ring_idx = 0; ring_idx < targ->core_task_set[idx].n_elems; ++ring_idx) {
789 struct core_task ct = targ->core_task_set[idx].core_task[ring_idx];
791 struct task_args *dest_task = core_targ_get(ct.core, ct.task);
792 dest_task->prev_tasks[dest_task->n_prev_tasks++] = targ;
798 static void setup_all_task_structs_early_init(void)
800 struct lcore_cfg *lconf = NULL;
801 struct task_args *targ;
803 plog_info("\t*** Calling early init on all tasks ***\n");
804 while (core_targ_next(&lconf, &targ, 0) == 0) {
805 if (targ->task_init->early_init) {
806 targ->task_init->early_init(targ);
811 static void setup_all_task_structs(void)
813 struct lcore_cfg *lconf;
814 uint32_t lcore_id = -1;
815 struct task_base *tmaster = NULL;
817 while(prox_core_next(&lcore_id, 1) == 0) {
818 lconf = &lcore_cfg[lcore_id];
819 for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
820 if (task_is_master(&lconf->targs[task_id])) {
821 plog_info("\tInitializing MASTER struct for core %d task %d\n", lcore_id, task_id);
822 lconf->tasks_all[task_id] = init_task_struct(&lconf->targs[task_id]);
823 tmaster = lconf->tasks_all[task_id];
827 PROX_PANIC(tmaster == NULL, "Can't initialize master task\n");
830 while(prox_core_next(&lcore_id, 1) == 0) {
831 lconf = &lcore_cfg[lcore_id];
832 plog_info("\tInitializing struct for core %d with %d task\n", lcore_id, lconf->n_tasks_all);
833 for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
834 if (!task_is_master(&lconf->targs[task_id])) {
835 plog_info("\tInitializing struct for core %d task %d\n", lcore_id, task_id);
836 lconf->targs[task_id].tmaster = tmaster;
837 lconf->tasks_all[task_id] = init_task_struct(&lconf->targs[task_id]);
843 static void init_port_activate(void)
845 struct lcore_cfg *lconf = NULL;
846 struct task_args *targ;
849 while (core_targ_next_early(&lconf, &targ, 0) == 0) {
850 for (int i = 0; i < targ->nb_rxports; i++) {
851 port_id = targ->rx_port_queue[i].port;
852 prox_port_cfg[port_id].active = 1;
855 for (int i = 0; i < targ->nb_txports; i++) {
856 port_id = targ->tx_port_queue[i].port;
857 prox_port_cfg[port_id].active = 1;
862 /* Initialize cores and allocate mempools */
863 static void init_lcores(void)
865 struct lcore_cfg *lconf = 0;
866 uint32_t lcore_id = -1;
868 while(prox_core_next(&lcore_id, 0) == 0) {
869 uint8_t socket = rte_lcore_to_socket_id(lcore_id);
870 PROX_PANIC(socket + 1 > MAX_SOCKETS, "Can't configure core %u (on socket %u). MAX_SOCKET is set to %d\n", lcore_id, socket, MAX_SOCKETS);
873 /* need to allocate mempools as the first thing to use the lowest possible address range */
874 plog_info("=== Initializing mempools ===\n");
877 lcore_cfg_alloc_hp();
882 plog_info("=== Initializing port addresses ===\n");
885 plog_info("=== Initializing queue numbers on cores ===\n");
886 configure_if_queues();
888 plog_info("=== Initializing rings on cores ===\n");
891 plog_info("=== Checking configuration consistency ===\n");
892 check_cfg_consistent();
896 setup_all_task_structs_early_init();
897 plog_info("=== Initializing tasks ===\n");
898 setup_all_task_structs();
901 static int setup_prox(int argc, char **argv)
903 if (prox_read_config_file() != 0 ||
904 prox_setup_rte(argv[0]) != 0) {
908 if (prox_cfg.flags & DSF_CHECK_SYNTAX) {
909 plog_info("=== Configuration file syntax has been checked ===\n\n");
913 init_port_activate();
914 plog_info("=== Initializing rte devices ===\n");
915 if (!(prox_cfg.flags & DSF_USE_DUMMY_DEVICES))
917 init_rte_dev(prox_cfg.flags & DSF_USE_DUMMY_DEVICES);
918 plog_info("=== Calibrating TSC overhead ===\n");
920 plog_info("\tTSC running at %"PRIu64" Hz\n", rte_get_tsc_hz());
923 plog_info("=== Initializing ports ===\n");
926 if (prox_cfg.logbuf_size) {
927 prox_cfg.logbuf = prox_zmalloc(prox_cfg.logbuf_size, rte_socket_id());
928 PROX_PANIC(prox_cfg.logbuf == NULL, "Failed to allocate memory for logbuf with size = %d\n", prox_cfg.logbuf_size);
931 if (prox_cfg.flags & DSF_CHECK_INIT) {
932 plog_info("=== Initialization sequence completed ===\n\n");
936 /* Current way that works to disable DPDK logging */
937 FILE *f = fopen("/dev/null", "r");
938 rte_openlog_stream(f);
939 plog_info("=== PROX started ===\n");
943 static int success = 0;
944 static void siguser_handler(int signal)
946 if (signal == SIGUSR1)
952 static void sigabrt_handler(__attribute__((unused)) int signum)
954 /* restore default disposition for SIGABRT and SIGPIPE */
955 signal(SIGABRT, SIG_DFL);
956 signal(SIGPIPE, SIG_DFL);
958 /* ignore further Ctrl-C */
959 signal(SIGINT, SIG_IGN);
961 /* more drastic exit on tedious termination signal */
962 plog_info("Aborting...\n");
963 if (lcore_cfg != NULL) {
965 pthread_t thread_id, tid0, tid = pthread_self();
966 memset(&tid0, 0, sizeof(tid0));
968 /* cancel all threads except current one */
970 while (prox_core_next(&lcore_id, 1) == 0) {
971 thread_id = lcore_cfg[lcore_id].thread_id;
972 if (pthread_equal(thread_id, tid0))
974 if (pthread_equal(thread_id, tid))
976 pthread_cancel(thread_id);
979 /* wait for cancelled threads to terminate */
981 while (prox_core_next(&lcore_id, 1) == 0) {
982 thread_id = lcore_cfg[lcore_id].thread_id;
983 if (pthread_equal(thread_id, tid0))
985 if (pthread_equal(thread_id, tid))
987 pthread_join(thread_id, NULL);
994 /* close ports on termination signal */
995 close_ports_atexit();
1001 static void sigterm_handler(int signum)
1003 /* abort on second Ctrl-C */
1004 if (signum == SIGINT)
1005 signal(SIGINT, sigabrt_handler);
1007 /* gracefully quit on harmless termination signal */
1008 /* ports will subsequently get closed at resulting exit */
1012 int main(int argc, char **argv)
1014 /* set en_US locale to print big numbers with ',' */
1015 setlocale(LC_NUMERIC, "en_US.utf-8");
1017 if (prox_parse_args(argc, argv) != 0){
1018 prox_usage(argv[0]);
1021 plog_init(prox_cfg.log_name, prox_cfg.log_name_pid);
1022 plog_info("=== " PROGRAM_NAME " " VERSION_STR " ===\n");
1023 plog_info("\tUsing DPDK %s\n", rte_version() + sizeof(RTE_VER_PREFIX));
1026 if (prox_cfg.flags & DSF_LIST_TASK_MODES) {
1027 /* list supported task modes and exit */
1029 return EXIT_SUCCESS;
1032 /* close ports at normal exit */
1033 atexit(close_ports_atexit);
1034 /* gracefully quit on harmless termination signals */
1035 signal(SIGHUP, sigterm_handler);
1036 signal(SIGINT, sigterm_handler);
1037 signal(SIGQUIT, sigterm_handler);
1038 signal(SIGTERM, sigterm_handler);
1039 signal(SIGUSR1, sigterm_handler);
1040 signal(SIGUSR2, sigterm_handler);
1041 /* more drastic exit on tedious termination signals */
1042 signal(SIGABRT, sigabrt_handler);
1043 signal(SIGPIPE, sigabrt_handler);
1045 if (prox_cfg.flags & DSF_DAEMON) {
1046 signal(SIGUSR1, siguser_handler);
1047 signal(SIGUSR2, siguser_handler);
1048 plog_info("=== Running in Daemon mode ===\n");
1049 plog_info("\tForking child and waiting for setup completion\n");
1051 pid_t ppid = getpid();
1054 plog_err("Failed to fork process to run in daemon mode\n");
1055 return EXIT_FAILURE;
1063 kill(ppid, SIGUSR2);
1064 return EXIT_FAILURE;
1066 if (setup_prox(argc, argv) != 0) {
1067 kill(ppid, SIGUSR2);
1068 return EXIT_FAILURE;
1071 kill(ppid, SIGUSR1);
1072 run(prox_cfg.flags);
1073 return EXIT_SUCCESS;
1077 /* Before exiting the parent, wait until the
1078 child process has finished setting up */
1080 if (prox_cfg.logbuf) {
1081 file_print(prox_cfg.logbuf);
1083 return success? EXIT_SUCCESS : EXIT_FAILURE;
1087 if (setup_prox(argc, argv) != 0)
1088 return EXIT_FAILURE;
1089 run(prox_cfg.flags);
1090 return EXIT_SUCCESS;