2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
21 #include <rte_byteorder.h>
22 #include <rte_version.h>
23 #include <rte_hash_crc.h>
25 #include "prox_malloc.h"
26 #include "task_base.h"
37 #include "hash_utils.h"
38 #include "handle_lb_net.h"
41 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
42 #define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
45 /* Load balancing based on one byte, figures out what type of packet
46 is passed and depending on the type, pass the packet to the correct
47 worker thread. If an unsupported packet type is used, the packet is
48 simply dropped. This Load balancer can only handling QinQ packets
49 (i.e. packets comming from the vCPE). */
50 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
51 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
54 struct task_base base;
55 uint8_t *worker_table;
57 uint8_t protocols_mask;
58 uint8_t nb_worker_threads;
62 static void init_task_lb_qinq(struct task_base *tbase, struct task_args *targ)
64 struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
65 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
67 task->qinq_tag = targ->qinq_tag;
68 task->nb_worker_threads = targ->nb_worker_threads;
69 task->bit_mask = rte_is_power_of_2(targ->nb_worker_threads) ? targ->nb_worker_threads - 1 : 0xff;
71 /* The load distributor is sending to a set of cores. These
72 cores are responsible for handling a set of flows
73 identified by a qinq tag. The load distributor identifies
74 the flows and forwards them to the appropriate worker. The
75 mapping from flow to worker is stored within the
76 work_table. Build the worker_table by asking each worker
77 which flows are handled. */
79 task->worker_table = prox_zmalloc(0x1000000, socket_id);
80 for (int i = 0; i < targ->nb_worker_threads; ++i) {
81 struct core_task ct = targ->core_task_set[0].core_task[i];
82 struct task_args *t = core_targ_get(ct.core, ct.task);
84 PROX_PANIC(t->task_init->flow_iter.beg == NULL,
85 "Load distributor can't find flows owned by destination worker %d\n", i);
87 struct flow_iter *it = &t->task_init->flow_iter;
90 for (it->beg(it, t); !it->is_end(it, t); it->next(it, t)) {
91 uint16_t svlan = it->get_svlan(it, t);
92 uint16_t cvlan = it->get_cvlan(it, t);
94 task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)] = i;
99 /* Check which protocols we are allowed to send to worker tasks */
100 for (int i = 0; i < MAX_PROTOCOLS; ++i) {
101 int is_active = !!targ->core_task_set[i].n_elems;
102 task->protocols_mask |= is_active << i;
104 plog_info("\t\ttask_lb_qinq protocols_mask = 0x%x\n", task->protocols_mask);
106 if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_RSS)
107 tbase->flags |= BASE_FLAG_LUT_QINQ_RSS;
108 if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_HASH)
109 tbase->flags |= BASE_FLAG_LUT_QINQ_HASH;
110 plog_info("\t\ttask_lb_qinq flags = 0x%x\n", tbase->flags);
113 static struct task_init task_init_lb_qinq = {
114 .mode_str = "lbqinq",
115 .init = init_task_lb_qinq,
116 .handle = handle_lb_qinq_bulk,
117 .size = sizeof(struct task_lb_qinq)
121 Add correct port id to mbufs coming from a DPDK ring port in the loadbalancer.
122 For the split-bng using DPDK rings between the vSwitch and the VMs
123 we need to know the port from which a packet was received.
124 The ring PMD in dpdk does not update the port field in the mbuf
125 and thus we have no control over the port numbers that are being used.
126 This submode allows the loadbalancer to set the port number on which it
129 static struct task_init task_init_lb_qinq_set_port = {
130 .mode_str = "lbqinq",
131 .sub_mode_str = "lut_qinq_set_port",
132 .init = init_task_lb_qinq,
133 .handle = handle_lb_qinq_bulk_set_port,
134 .size = sizeof(struct task_lb_qinq)
138 Load Balance on Hash of combination of cvlan and svlan
140 static struct task_init task_init_lb_qinq_hash_friend = {
141 .mode_str = "lbqinq",
142 .sub_mode_str ="lut_qinq_hash_friend",
143 .init = init_task_lb_qinq,
144 .handle = handle_lb_qinq_bulk,
145 .flag_features = TASK_FEATURE_LUT_QINQ_HASH,
146 .size = sizeof(struct task_lb_qinq)
150 Load Balance on rss of combination of cvlan and svlan.
151 This could be used to compare with HW implementations.
153 static struct task_init task_init_lb_qinq_rss_friend = {
154 .mode_str = "lbqinq",
155 .sub_mode_str ="lut_qinq_rss_friend",
156 .init = init_task_lb_qinq,
157 .handle = handle_lb_qinq_bulk,
158 .flag_features = TASK_FEATURE_LUT_QINQ_RSS,
159 .size = sizeof(struct task_lb_qinq)
162 __attribute__((constructor)) static void reg_task_lb_qinq(void)
164 reg_task(&task_init_lb_qinq);
165 reg_task(&task_init_lb_qinq_hash_friend);
166 reg_task(&task_init_lb_qinq_rss_friend);
167 reg_task(&task_init_lb_qinq_set_port);
170 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf);
172 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
174 struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
175 uint8_t out[MAX_PKT_BURST];
178 prefetch_first(mbufs, n_pkts);
180 for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
181 #ifdef PROX_PREFETCH_OFFSET
182 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
183 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
185 out[j] = handle_lb_qinq(task, mbufs[j]);
187 #ifdef PROX_PREFETCH_OFFSET
188 PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
189 for (; j < n_pkts; ++j) {
190 out[j] = handle_lb_qinq(task, mbufs[j]);
194 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
197 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
199 struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
200 uint8_t out[MAX_PKT_BURST];
202 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
203 uint32_t port_id = mbufs[0]->pkt.in_port;
205 uint32_t port_id = mbufs[0]->port;
208 if (tbase->rx_pkt == rx_pkt_hw) {
209 port_id = tbase->rx_params_hw.last_read_portid + tbase->rx_params_hw.nb_rxports;
210 port_id = ( port_id - 1 ) % tbase->rx_params_hw.nb_rxports;
211 port_id = tbase->rx_params_hw.rx_pq[port_id].port;
212 } else if (tbase->rx_pkt == rx_pkt_hw1) {
213 port_id = tbase->rx_params_hw1.rx_pq.port;
216 prefetch_first(mbufs, n_pkts);
218 for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
219 #ifdef PROX_PREFETCH_OFFSET
220 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
221 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
223 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
224 mbufs[j]->pkt.in_port = port_id;
226 mbufs[j]->port = port_id;
228 out[j] = handle_lb_qinq(task, mbufs[j]);
230 #ifdef PROX_PREFETCH_OFFSET
231 PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
232 for (; j < n_pkts; ++j) {
233 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
234 mbufs[j]->pkt.in_port = port_id;
236 mbufs[j]->port = port_id;
238 out[j] = handle_lb_qinq(task, mbufs[j]);
242 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
246 struct qinq_hdr qinq_hdr;
248 struct ipv4_hdr ipv4_hdr;
249 struct ipv6_hdr ipv6_hdr;
251 } __attribute__((packed));
253 struct qinq_packet_data {
254 struct ether_addr d_addr;
255 struct ether_addr s_addr;
257 } __attribute__((packed));
259 struct ether_packet {
260 struct ether_hdr ether_hdr;
262 struct ipv4_hdr ipv4_hdr;
263 struct ipv6_hdr ipv6_hdr;
265 } __attribute__((packed));
269 struct qinq_packet qp;
270 struct ether_packet ep;
271 struct qinq_packet_data qd;
275 static inline uint8_t get_worker(struct task_lb_qinq *task, struct cpe_packet *packet)
278 if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_HASH) {
279 // Load Balance on Hash of combination of cvlan and svlan
280 uint64_t qinq_net = packet->qd.qinq;
281 qinq_net = qinq_net & 0xFF0F0000FF0F0000; // Mask Proto and QoS bits
282 if (task->bit_mask != 0xff) {
283 worker = rte_hash_crc(&qinq_net,8,0) & task->bit_mask;
286 worker = rte_hash_crc(&qinq_net,8,0) % task->nb_worker_threads;
288 plogx_dbg("Sending packet svlan=%x, cvlan=%x, pseudo_qinq=%lx to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq_net, worker);
289 } else if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_RSS){
290 // Load Balance on rss of combination of cvlan and svlan
291 uint32_t qinq = (packet->qp.qinq_hdr.cvlan.vlan_tci & 0xFF0F) << 16;
292 uint32_t rss = toeplitz_hash((uint8_t *)&qinq, 4);
293 if (task->bit_mask != 0xff) {
294 worker = rss & task->bit_mask;
296 worker = (0x1ff & rss) % task->nb_worker_threads;
298 plogx_dbg("Sending packet svlan=%x, cvlan=%x, rss_input=%x, rss=%x to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq, rss, worker);
300 uint16_t svlan = packet->qp.qinq_hdr.svlan.vlan_tci;
301 uint16_t cvlan = packet->qp.qinq_hdr.cvlan.vlan_tci;
302 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
303 worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
305 const size_t pos = offsetof(struct cpe_packet, qp.qinq_hdr.cvlan.vlan_tci);
306 plogx_dbg("qinq = %u, worker = %u, pos = %lu\n", rte_be_to_cpu_16(cvlan), worker, pos);
311 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf)
313 struct cpe_packet *packet = rte_pktmbuf_mtod(mbuf, struct cpe_packet*);
314 if (packet->ep.ether_hdr.ether_type == ETYPE_IPv4) {
315 if (unlikely((packet->ep.ipv4_hdr.version_ihl >> 4) != 4)) {
316 plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->ep.ipv4_hdr.version_ihl);
319 /* use 24 bits from the IP, clients are from the 10.0.0.0/8 network */
320 const uint32_t tmp = rte_bswap32(packet->ep.ipv4_hdr.src_addr) & 0x00FFFFFF;
321 const uint32_t svlan = rte_bswap16(tmp >> 12);
322 const uint32_t cvlan = rte_bswap16(tmp & 0x0FFF);
323 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
324 uint8_t worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
325 return worker + IPV4 * task->nb_worker_threads;
327 else if (unlikely(packet->qp.qinq_hdr.svlan.eth_proto != task->qinq_tag)) {
328 /* might receive LLDP from the L2 switch... */
329 if (packet->qp.qinq_hdr.svlan.eth_proto != ETYPE_LLDP) {
330 plogdx_err(mbuf, "Invalid packet for LB in QinQ mode\n");
336 uint8_t proto = 0xFF;
337 switch (packet->qp.qinq_hdr.ether_type) {
339 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 4)) {
340 plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->qp.ipv4_hdr.version_ihl);
343 worker = get_worker(task, packet);
348 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 6)) {
349 plogx_err("Invalid Version %u for ETYPE_IPv6\n", packet->qp.ipv4_hdr.version_ihl);
352 /* Use IP Destination when IPV6 QinQ */
353 if (task->bit_mask != 0xff) {
354 worker = ((uint8_t *)packet)[61] & task->bit_mask;
357 worker = ((uint8_t *)packet)[61] % task->nb_worker_threads;
363 // We can only send to ARP ring if it exists
364 if (0 != (task->protocols_mask & (1 << ARP))) {
369 worker = get_worker(task, packet);
373 plogx_warn("Error in ETYPE_8021ad: ether_type = %#06x\n", packet->qp.qinq_hdr.ether_type);
377 return worker + proto * task->nb_worker_threads;