2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
21 #include <rte_byteorder.h>
22 #include <rte_version.h>
24 #include "prox_malloc.h"
25 #include "task_base.h"
36 #include "hash_utils.h"
37 #include "handle_lb_net.h"
40 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
41 #define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
44 /* Load balancing based on one byte, figures out what type of packet
45 is passed and depending on the type, pass the packet to the correct
46 worker thread. If an unsupported packet type is used, the packet is
47 simply dropped. This Load balancer can only handling QinQ packets
48 (i.e. packets comming from the vCPE). */
49 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
50 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
53 struct task_base base;
54 uint8_t *worker_table;
56 uint8_t protocols_mask;
57 uint8_t nb_worker_threads;
61 static void init_task_lb_qinq(struct task_base *tbase, struct task_args *targ)
63 struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
64 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
66 task->qinq_tag = targ->qinq_tag;
67 task->nb_worker_threads = targ->nb_worker_threads;
68 task->bit_mask = rte_is_power_of_2(targ->nb_worker_threads) ? targ->nb_worker_threads - 1 : 0xff;
70 /* The load distributor is sending to a set of cores. These
71 cores are responsible for handling a set of flows
72 identified by a qinq tag. The load distributor identifies
73 the flows and forwards them to the appropriate worker. The
74 mapping from flow to worker is stored within the
75 work_table. Build the worker_table by asking each worker
76 which flows are handled. */
78 task->worker_table = prox_zmalloc(0x1000000, socket_id);
79 for (int i = 0; i < targ->nb_worker_threads; ++i) {
80 struct core_task ct = targ->core_task_set[0].core_task[i];
81 struct task_args *t = core_targ_get(ct.core, ct.task);
83 PROX_PANIC(t->task_init->flow_iter.beg == NULL,
84 "Load distributor can't find flows owned by destination worker %d\n", i);
86 struct flow_iter *it = &t->task_init->flow_iter;
89 for (it->beg(it, t); !it->is_end(it, t); it->next(it, t)) {
90 uint16_t svlan = it->get_svlan(it, t);
91 uint16_t cvlan = it->get_cvlan(it, t);
93 task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)] = i;
98 /* Check which protocols we are allowed to send to worker tasks */
99 for (int i = 0; i < MAX_PROTOCOLS; ++i) {
100 int is_active = !!targ->core_task_set[i].n_elems;
101 task->protocols_mask |= is_active << i;
103 plog_info("\t\ttask_lb_qinq protocols_mask = 0x%x\n", task->protocols_mask);
105 if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_RSS)
106 tbase->flags |= BASE_FLAG_LUT_QINQ_RSS;
107 if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_HASH)
108 tbase->flags |= BASE_FLAG_LUT_QINQ_HASH;
109 plog_info("\t\ttask_lb_qinq flags = 0x%x\n", tbase->flags);
112 static struct task_init task_init_lb_qinq = {
113 .mode_str = "lbqinq",
114 .init = init_task_lb_qinq,
115 .handle = handle_lb_qinq_bulk,
116 .size = sizeof(struct task_lb_qinq)
120 Add correct port id to mbufs coming from a DPDK ring port in the loadbalancer.
121 For the split-bng using DPDK rings between the vSwitch and the VMs
122 we need to know the port from which a packet was received.
123 The ring PMD in dpdk does not update the port field in the mbuf
124 and thus we have no control over the port numbers that are being used.
125 This submode allows the loadbalancer to set the port number on which it
128 static struct task_init task_init_lb_qinq_set_port = {
129 .mode_str = "lbqinq",
130 .sub_mode_str = "lut_qinq_set_port",
131 .init = init_task_lb_qinq,
132 .handle = handle_lb_qinq_bulk_set_port,
133 .size = sizeof(struct task_lb_qinq)
137 Load Balance on Hash of combination of cvlan and svlan
139 static struct task_init task_init_lb_qinq_hash_friend = {
140 .mode_str = "lbqinq",
141 .sub_mode_str ="lut_qinq_hash_friend",
142 .init = init_task_lb_qinq,
143 .handle = handle_lb_qinq_bulk,
144 .flag_features = TASK_FEATURE_LUT_QINQ_HASH,
145 .size = sizeof(struct task_lb_qinq)
149 Load Balance on rss of combination of cvlan and svlan.
150 This could be used to compare with HW implementations.
152 static struct task_init task_init_lb_qinq_rss_friend = {
153 .mode_str = "lbqinq",
154 .sub_mode_str ="lut_qinq_rss_friend",
155 .init = init_task_lb_qinq,
156 .handle = handle_lb_qinq_bulk,
157 .flag_features = TASK_FEATURE_LUT_QINQ_RSS,
158 .size = sizeof(struct task_lb_qinq)
161 __attribute__((constructor)) static void reg_task_lb_qinq(void)
163 reg_task(&task_init_lb_qinq);
164 reg_task(&task_init_lb_qinq_hash_friend);
165 reg_task(&task_init_lb_qinq_rss_friend);
166 reg_task(&task_init_lb_qinq_set_port);
169 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf);
171 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
173 struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
174 uint8_t out[MAX_PKT_BURST];
177 prefetch_first(mbufs, n_pkts);
179 for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
180 #ifdef PROX_PREFETCH_OFFSET
181 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
182 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
184 out[j] = handle_lb_qinq(task, mbufs[j]);
186 #ifdef PROX_PREFETCH_OFFSET
187 PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
188 for (; j < n_pkts; ++j) {
189 out[j] = handle_lb_qinq(task, mbufs[j]);
193 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
196 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
198 struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
199 uint8_t out[MAX_PKT_BURST];
201 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
202 uint32_t port_id = mbufs[0]->pkt.in_port;
204 uint32_t port_id = mbufs[0]->port;
207 if (tbase->rx_pkt == rx_pkt_hw) {
208 port_id = tbase->rx_params_hw.last_read_portid + tbase->rx_params_hw.nb_rxports;
209 port_id = ( port_id - 1 ) % tbase->rx_params_hw.nb_rxports;
210 port_id = tbase->rx_params_hw.rx_pq[port_id].port;
211 } else if (tbase->rx_pkt == rx_pkt_hw1) {
212 port_id = tbase->rx_params_hw1.rx_pq.port;
215 prefetch_first(mbufs, n_pkts);
217 for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
218 #ifdef PROX_PREFETCH_OFFSET
219 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
220 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
222 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
223 mbufs[j]->pkt.in_port = port_id;
225 mbufs[j]->port = port_id;
227 out[j] = handle_lb_qinq(task, mbufs[j]);
229 #ifdef PROX_PREFETCH_OFFSET
230 PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
231 for (; j < n_pkts; ++j) {
232 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
233 mbufs[j]->pkt.in_port = port_id;
235 mbufs[j]->port = port_id;
237 out[j] = handle_lb_qinq(task, mbufs[j]);
241 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
245 struct qinq_hdr qinq_hdr;
247 struct ipv4_hdr ipv4_hdr;
248 struct ipv6_hdr ipv6_hdr;
250 } __attribute__((packed));
252 struct qinq_packet_data {
253 struct ether_addr d_addr;
254 struct ether_addr s_addr;
256 } __attribute__((packed));
258 struct ether_packet {
259 struct ether_hdr ether_hdr;
261 struct ipv4_hdr ipv4_hdr;
262 struct ipv6_hdr ipv6_hdr;
264 } __attribute__((packed));
268 struct qinq_packet qp;
269 struct ether_packet ep;
270 struct qinq_packet_data qd;
274 static inline uint8_t get_worker(struct task_lb_qinq *task, struct cpe_packet *packet)
277 if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_HASH) {
278 // Load Balance on Hash of combination of cvlan and svlan
279 uint64_t qinq_net = packet->qd.qinq;
280 qinq_net = qinq_net & 0xFF0F0000FF0F0000; // Mask Proto and QoS bits
281 if (task->bit_mask != 0xff) {
282 worker = hash_crc32(&qinq_net,8,0) & task->bit_mask;
285 worker = hash_crc32(&qinq_net,8,0) % task->nb_worker_threads;
287 plogx_dbg("Sending packet svlan=%x, cvlan=%x, pseudo_qinq=%lx to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq_net, worker);
288 } else if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_RSS){
289 // Load Balance on rss of combination of cvlan and svlan
290 uint32_t qinq = (packet->qp.qinq_hdr.cvlan.vlan_tci & 0xFF0F) << 16;
291 uint32_t rss = toeplitz_hash((uint8_t *)&qinq, 4);
292 if (task->bit_mask != 0xff) {
293 worker = rss & task->bit_mask;
295 worker = (0x1ff & rss) % task->nb_worker_threads;
297 plogx_dbg("Sending packet svlan=%x, cvlan=%x, rss_input=%x, rss=%x to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq, rss, worker);
299 uint16_t svlan = packet->qp.qinq_hdr.svlan.vlan_tci;
300 uint16_t cvlan = packet->qp.qinq_hdr.cvlan.vlan_tci;
301 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
302 worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
304 const size_t pos = offsetof(struct cpe_packet, qp.qinq_hdr.cvlan.vlan_tci);
305 plogx_dbg("qinq = %u, worker = %u, pos = %lu\n", rte_be_to_cpu_16(cvlan), worker, pos);
310 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf)
312 struct cpe_packet *packet = rte_pktmbuf_mtod(mbuf, struct cpe_packet*);
313 if (packet->ep.ether_hdr.ether_type == ETYPE_IPv4) {
314 if (unlikely((packet->ep.ipv4_hdr.version_ihl >> 4) != 4)) {
315 plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->ep.ipv4_hdr.version_ihl);
318 /* use 24 bits from the IP, clients are from the 10.0.0.0/8 network */
319 const uint32_t tmp = rte_bswap32(packet->ep.ipv4_hdr.src_addr) & 0x00FFFFFF;
320 const uint32_t svlan = rte_bswap16(tmp >> 12);
321 const uint32_t cvlan = rte_bswap16(tmp & 0x0FFF);
322 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
323 uint8_t worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
324 return worker + IPV4 * task->nb_worker_threads;
326 else if (unlikely(packet->qp.qinq_hdr.svlan.eth_proto != task->qinq_tag)) {
327 /* might receive LLDP from the L2 switch... */
328 if (packet->qp.qinq_hdr.svlan.eth_proto != ETYPE_LLDP) {
329 plogdx_err(mbuf, "Invalid packet for LB in QinQ mode\n");
335 uint8_t proto = 0xFF;
336 switch (packet->qp.qinq_hdr.ether_type) {
338 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 4)) {
339 plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->qp.ipv4_hdr.version_ihl);
342 worker = get_worker(task, packet);
347 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 6)) {
348 plogx_err("Invalid Version %u for ETYPE_IPv6\n", packet->qp.ipv4_hdr.version_ihl);
351 /* Use IP Destination when IPV6 QinQ */
352 if (task->bit_mask != 0xff) {
353 worker = ((uint8_t *)packet)[61] & task->bit_mask;
356 worker = ((uint8_t *)packet)[61] % task->nb_worker_threads;
362 // We can only send to ARP ring if it exists
363 if (0 != (task->protocols_mask & (1 << ARP))) {
368 worker = get_worker(task, packet);
372 plogx_warn("Error in ETYPE_8021ad: ether_type = %#06x\n", packet->qp.qinq_hdr.ether_type);
376 return worker + proto * task->nb_worker_threads;