Renaming of defines for clarity
[samplevnf.git] / VNFs / DPPD-PROX / handle_lb_qinq.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18
19 #include <rte_mbuf.h>
20 #include <rte_ip.h>
21 #include <rte_byteorder.h>
22 #include <rte_version.h>
23 #include <rte_hash_crc.h>
24
25 #include "prox_malloc.h"
26 #include "task_base.h"
27 #include "tx_pkt.h"
28 #include "rx_pkt.h"
29 #include "etypes.h"
30 #include "log.h"
31 #include "quit.h"
32 #include "qinq.h"
33 #include "lconf.h"
34 #include "prefetch.h"
35 #include "defines.h"
36 #include "prox_cfg.h"
37 #include "hash_utils.h"
38 #include "handle_lb_net.h"
39 #include "toeplitz.h"
40
41 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
42 #define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
43 #endif
44
45 /* Load balancing based on one byte, figures out what type of packet
46    is passed and depending on the type, pass the packet to the correct
47    worker thread. If an unsupported packet type is used, the packet is
48    simply dropped. This Load balancer can only handling QinQ packets
49    (i.e. packets comming from the vCPE). */
50 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
51 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
52
53 struct task_lb_qinq {
54         struct task_base        base;
55         uint8_t                 *worker_table;
56         uint8_t                 bit_mask;
57         uint8_t                 protocols_mask;
58         uint8_t                 nb_worker_threads;
59         uint16_t                qinq_tag;
60 };
61
62 static void init_task_lb_qinq(struct task_base *tbase, struct task_args *targ)
63 {
64         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
65         const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
66
67         task->qinq_tag = targ->qinq_tag;
68         task->nb_worker_threads = targ->nb_worker_threads;
69         task->bit_mask = rte_is_power_of_2(targ->nb_worker_threads) ? targ->nb_worker_threads - 1 : 0xff;
70
71         /* The load distributor is sending to a set of cores. These
72            cores are responsible for handling a set of flows
73            identified by a qinq tag. The load distributor identifies
74            the flows and forwards them to the appropriate worker. The
75            mapping from flow to worker is stored within the
76            work_table. Build the worker_table by asking each worker
77            which flows are handled. */
78
79         task->worker_table = prox_zmalloc(0x1000000, socket_id);
80         for (int i = 0; i < targ->nb_worker_threads; ++i) {
81                 struct core_task ct = targ->core_task_set[0].core_task[i];
82                 struct task_args *t = core_targ_get(ct.core, ct.task);
83
84                 PROX_PANIC(t->task_init->flow_iter.beg == NULL,
85                            "Load distributor can't find flows owned by destination worker %d\n", i);
86
87                 struct flow_iter *it = &t->task_init->flow_iter;
88
89                 int cnt = 0;
90                 for (it->beg(it, t); !it->is_end(it, t); it->next(it, t)) {
91                         uint16_t svlan = it->get_svlan(it, t);
92                         uint16_t cvlan = it->get_cvlan(it, t);
93
94                         task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)] = i;
95                 }
96
97         }
98
99         /* Check which protocols we are allowed to send to worker tasks */
100         for (int i = 0; i < MAX_PROTOCOLS; ++i) {
101                 int is_active = !!targ->core_task_set[i].n_elems;
102                 task->protocols_mask |= is_active << i;
103         }
104         plog_info("\t\ttask_lb_qinq protocols_mask = 0x%x\n", task->protocols_mask);
105
106         if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_RSS)
107                 tbase->flags |=  TBASE_FLAG_LUT_QINQ_RSS;
108         if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_HASH)
109                 tbase->flags |=  TBASE_FLAG_LUT_QINQ_HASH;
110         plog_info("\t\ttask_lb_qinq flags = 0x%x\n", tbase->flags);
111 }
112
113 static struct task_init task_init_lb_qinq = {
114         .mode_str = "lbqinq",
115         .init = init_task_lb_qinq,
116         .handle = handle_lb_qinq_bulk,
117         .size = sizeof(struct task_lb_qinq)
118 };
119
120 /*
121         Add correct port id to mbufs coming from a DPDK ring port in the loadbalancer.
122         For the split-bng using DPDK rings between the vSwitch and the VMs
123         we need to know the port from which a packet was received.
124         The ring PMD in dpdk does not update the port field in the mbuf
125         and thus we have no control over the port numbers that are being used.
126         This submode allows the loadbalancer to set the port number on which it
127         received the mbuf.
128 */
129 static struct task_init task_init_lb_qinq_set_port = {
130         .mode_str = "lbqinq",
131         .sub_mode_str = "lut_qinq_set_port",
132         .init = init_task_lb_qinq,
133         .handle = handle_lb_qinq_bulk_set_port,
134         .size = sizeof(struct task_lb_qinq)
135 };
136
137 /*
138         Load Balance on Hash of combination of cvlan and svlan
139 */
140 static struct task_init task_init_lb_qinq_hash_friend = {
141         .mode_str = "lbqinq",
142         .sub_mode_str ="lut_qinq_hash_friend",
143         .init = init_task_lb_qinq,
144         .handle = handle_lb_qinq_bulk,
145         .flag_features = TASK_FEATURE_LUT_QINQ_HASH,
146         .size = sizeof(struct task_lb_qinq)
147 };
148
149 /*
150         Load Balance on rss of combination of cvlan and svlan.
151         This could be used to compare with HW implementations.
152 */
153 static struct task_init task_init_lb_qinq_rss_friend = {
154         .mode_str = "lbqinq",
155         .sub_mode_str ="lut_qinq_rss_friend",
156         .init = init_task_lb_qinq,
157         .handle = handle_lb_qinq_bulk,
158         .flag_features = TASK_FEATURE_LUT_QINQ_RSS,
159         .size = sizeof(struct task_lb_qinq)
160 };
161
162 __attribute__((constructor)) static void reg_task_lb_qinq(void)
163 {
164         reg_task(&task_init_lb_qinq);
165         reg_task(&task_init_lb_qinq_hash_friend);
166         reg_task(&task_init_lb_qinq_rss_friend);
167         reg_task(&task_init_lb_qinq_set_port);
168 }
169
170 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf);
171
172 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
173 {
174         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
175         uint8_t out[MAX_PKT_BURST];
176         uint16_t j;
177
178         prefetch_first(mbufs, n_pkts);
179
180         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
181 #ifdef PROX_PREFETCH_OFFSET
182                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
183                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
184 #endif
185                 out[j] = handle_lb_qinq(task, mbufs[j]);
186         }
187 #ifdef PROX_PREFETCH_OFFSET
188         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
189         for (; j < n_pkts; ++j) {
190                 out[j] = handle_lb_qinq(task, mbufs[j]);
191         }
192 #endif
193
194         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
195 }
196
197 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
198 {
199         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
200         uint8_t out[MAX_PKT_BURST];
201         uint16_t j;
202 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
203         uint32_t port_id = mbufs[0]->pkt.in_port;
204 #else
205         uint32_t port_id = mbufs[0]->port;
206 #endif
207
208         if (tbase->rx_pkt == rx_pkt_hw) {
209                 port_id = tbase->rx_params_hw.last_read_portid + tbase->rx_params_hw.nb_rxports;
210                 port_id = ( port_id - 1 ) % tbase->rx_params_hw.nb_rxports;
211                 port_id = tbase->rx_params_hw.rx_pq[port_id].port;
212         } else if (tbase->rx_pkt == rx_pkt_hw1) {
213                 port_id = tbase->rx_params_hw1.rx_pq.port;
214         }
215
216         prefetch_first(mbufs, n_pkts);
217
218         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
219 #ifdef PROX_PREFETCH_OFFSET
220                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
221                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
222 #endif
223 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
224                 mbufs[j]->pkt.in_port = port_id;
225 #else
226                 mbufs[j]->port = port_id;
227 #endif
228                 out[j] = handle_lb_qinq(task, mbufs[j]);
229         }
230 #ifdef PROX_PREFETCH_OFFSET
231         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
232         for (; j < n_pkts; ++j) {
233 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
234                 mbufs[j]->pkt.in_port = port_id;
235 #else
236                 mbufs[j]->port = port_id;
237 #endif
238                 out[j] = handle_lb_qinq(task, mbufs[j]);
239         }
240 #endif
241
242         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
243 }
244
245 struct qinq_packet {
246         struct qinq_hdr qinq_hdr;
247         union {
248                 prox_rte_ipv4_hdr ipv4_hdr;
249                 prox_rte_ipv6_hdr ipv6_hdr;
250         };
251 } __attribute__((packed)) __attribute__((__aligned__(2)));
252
253 struct qinq_packet_data {
254         prox_rte_ether_addr  d_addr;
255         prox_rte_ether_addr  s_addr;
256         uint64_t qinq;
257 } __attribute__((packed)) __attribute__((__aligned__(2)));
258
259 struct ether_packet {
260         prox_rte_ether_hdr ether_hdr;
261         union {
262                 prox_rte_ipv4_hdr ipv4_hdr;
263                 prox_rte_ipv6_hdr ipv6_hdr;
264         };
265 } __attribute__((packed)) __attribute__((__aligned__(2)));
266
267 struct cpe_packet {
268         union {
269                 struct qinq_packet  qp;
270                 struct ether_packet ep;
271                 struct qinq_packet_data qd;
272         };
273 };
274
275 static inline uint8_t get_worker(struct task_lb_qinq *task, struct cpe_packet *packet)
276 {
277         uint8_t worker = 0;
278         if (((struct task_base *)task)->flags & TBASE_FLAG_LUT_QINQ_HASH) {
279                 // Load Balance on Hash of combination of cvlan and svlan
280                 uint64_t qinq_net = packet->qd.qinq;
281                 qinq_net = qinq_net & 0xFF0F0000FF0F0000;       // Mask Proto and QoS bits
282                 if (task->bit_mask != 0xff) {
283                         worker = rte_hash_crc(&qinq_net,8,0) & task->bit_mask;
284                 }
285                 else {
286                         worker = rte_hash_crc(&qinq_net,8,0) % task->nb_worker_threads;
287                 }
288                 plogx_dbg("Sending packet svlan=%x, cvlan=%x, pseudo_qinq=%lx to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq_net, worker);
289         } else if (((struct task_base *)task)->flags & TBASE_FLAG_LUT_QINQ_RSS){
290                 // Load Balance on rss of combination of cvlan and svlan
291                 uint32_t qinq = (packet->qp.qinq_hdr.cvlan.vlan_tci & 0xFF0F) << 16;
292                 uint32_t rss = toeplitz_hash((uint8_t *)&qinq, 4);
293                 if (task->bit_mask != 0xff) {
294                         worker = rss & task->bit_mask;
295                 } else {
296                         worker = (0x1ff & rss) % task->nb_worker_threads;
297                 }
298                 plogx_dbg("Sending packet svlan=%x, cvlan=%x, rss_input=%x, rss=%x to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq, rss, worker);
299         } else {
300                 uint16_t svlan = packet->qp.qinq_hdr.svlan.vlan_tci;
301                 uint16_t cvlan = packet->qp.qinq_hdr.cvlan.vlan_tci;
302                 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
303                 worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
304
305                 const size_t pos = offsetof(struct cpe_packet, qp.qinq_hdr.cvlan.vlan_tci);
306                 plogx_dbg("qinq = %u, worker = %u, pos = %lu\n", rte_be_to_cpu_16(cvlan), worker, pos);
307         }
308         return worker;
309 }
310
311 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf)
312 {
313         struct cpe_packet *packet = rte_pktmbuf_mtod(mbuf, struct cpe_packet*);
314         if (packet->ep.ether_hdr.ether_type == ETYPE_IPv4) {
315                 if (unlikely((packet->ep.ipv4_hdr.version_ihl >> 4) != 4)) {
316                         plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->ep.ipv4_hdr.version_ihl);
317                         return OUT_DISCARD;
318                 }
319                 /* use 24 bits from the IP, clients are from the 10.0.0.0/8 network */
320                 const uint32_t tmp = rte_bswap32(packet->ep.ipv4_hdr.src_addr) & 0x00FFFFFF;
321                 const uint32_t svlan = rte_bswap16(tmp >> 12);
322                 const uint32_t cvlan = rte_bswap16(tmp & 0x0FFF);
323                 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
324                 uint8_t worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
325                 return worker + IPV4 * task->nb_worker_threads;
326         }
327         else if (unlikely(packet->qp.qinq_hdr.svlan.eth_proto != task->qinq_tag)) {
328                 /* might receive LLDP from the L2 switch... */
329                 if (packet->qp.qinq_hdr.svlan.eth_proto != ETYPE_LLDP) {
330                         plogdx_err(mbuf, "Invalid packet for LB in QinQ mode\n");
331                 }
332                 return OUT_DISCARD;
333         }
334
335         uint8_t worker = 0;
336         uint8_t proto = 0xFF;
337         switch (packet->qp.qinq_hdr.ether_type) {
338         case ETYPE_IPv4: {
339                 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 4)) {
340                         plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->qp.ipv4_hdr.version_ihl);
341                         return OUT_DISCARD;
342                 }
343                 worker = get_worker(task, packet);
344                 proto = IPV4;
345                 break;
346         }
347         case ETYPE_IPv6: {
348                 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 6)) {
349                         plogx_err("Invalid Version %u for ETYPE_IPv6\n", packet->qp.ipv4_hdr.version_ihl);
350                         return OUT_DISCARD;
351                 }
352                 /* Use IP Destination when IPV6 QinQ */
353                 if (task->bit_mask != 0xff) {
354                         worker = ((uint8_t *)packet)[61] & task->bit_mask;
355                 }
356                 else {
357                         worker = ((uint8_t *)packet)[61] % task->nb_worker_threads;
358                 }
359                 proto = IPV6;
360                 break;
361         }
362         case ETYPE_ARP: {
363                 // We can only send to ARP ring if it exists
364                 if (0 != (task->protocols_mask & (1 << ARP))) {
365                         proto = ARP;
366                 } else {
367                         proto = IPV4;
368                 }
369                 worker = get_worker(task, packet);
370                 break;
371         }
372         default:
373                 plogx_warn("Error in ETYPE_8021ad: ether_type = %#06x\n", packet->qp.qinq_hdr.ether_type);
374                 return OUT_DISCARD;
375         }
376
377         return worker + proto * task->nb_worker_threads;
378 }