Merge "Fix latency accuracy and dumping latencies to file"
[samplevnf.git] / VNFs / DPPD-PROX / handle_routing.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <rte_lpm.h>
18 #include <rte_cycles.h>
19 #include <string.h>
20 #include <rte_version.h>
21 #include <rte_ip.h>
22 #include <rte_byteorder.h>
23
24 #include "prox_lua.h"
25 #include "prox_lua_types.h"
26
27 #include "quit.h"
28 #include "log.h"
29 #include "handle_routing.h"
30 #include "tx_pkt.h"
31 #include "gre.h"
32 #include "lconf.h"
33 #include "prox_port_cfg.h"
34 #include "etypes.h"
35 #include "prefetch.h"
36 #include "hash_entry_types.h"
37 #include "mpls.h"
38 #include "qinq.h"
39 #include "prox_cfg.h"
40 #include "ip6_addr.h"
41 #include "prox_shared.h"
42 #include "prox_cksum.h"
43 #include "mbuf_utils.h"
44
45 struct task_routing {
46         struct task_base                base;
47         uint8_t                         runtime_flags;
48         struct lcore_cfg                *lconf;
49         struct rte_lpm                  *ipv4_lpm;
50         struct next_hop                 *next_hops;
51         int                             offload_crc;
52         uint32_t                        number_free_rules;
53         uint16_t                        qinq_tag;
54         uint32_t                        marking[4];
55         uint64_t                        src_mac[PROX_MAX_PORTS];
56 };
57
58 static void routing_update(struct task_base *tbase, void **data, uint16_t n_msgs)
59 {
60         struct task_routing *task = (struct task_routing *)tbase;
61         struct route_msg *msg;
62
63         for (uint16_t i = 0; i < n_msgs; ++i) {
64                 msg = (struct route_msg *)data[i];
65
66                 if (task->number_free_rules == 0) {
67                         plog_warn("Failed adding route: %u.%u.%u.%u/%u: lpm table full\n",
68                                 msg->ip_bytes[0], msg->ip_bytes[1], msg->ip_bytes[2],
69                                 msg->ip_bytes[3], msg->prefix);
70                 } else {
71                         if (rte_lpm_add(task->ipv4_lpm, rte_bswap32(msg->ip), msg->prefix, msg->nh)) {
72                                 plog_warn("Failed adding route: %u.%u.%u.%u/%u\n",
73                                         msg->ip_bytes[0], msg->ip_bytes[1], msg->ip_bytes[2],
74                                         msg->ip_bytes[3], msg->prefix);
75                         } else {
76                                 task->number_free_rules--;
77                         }
78                 }
79         }
80 }
81
82 static void init_task_routing(struct task_base *tbase, struct task_args *targ)
83 {
84         struct task_routing *task = (struct task_routing *)tbase;
85         const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
86         struct lpm4 *lpm;
87
88         task->lconf = targ->lconf;
89         task->qinq_tag = targ->qinq_tag;
90         task->runtime_flags = targ->runtime_flags;
91
92         PROX_PANIC(!strcmp(targ->route_table, ""), "route table not specified\n");
93         if (targ->flags & TASK_ARG_LOCAL_LPM) {
94                 int ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
95                 PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
96                 prox_sh_add_socket(socket_id, targ->route_table, lpm);
97
98                 task->number_free_rules = lpm->n_free_rules;
99         }
100         else {
101                 lpm = prox_sh_find_socket(socket_id, targ->route_table);
102                 if (!lpm) {
103                         int ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
104                         PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
105                         prox_sh_add_socket(socket_id, targ->route_table, lpm);
106                 }
107         }
108         task->ipv4_lpm = lpm->rte_lpm;
109         task->next_hops = lpm->next_hops;
110         task->number_free_rules = lpm->n_free_rules;
111
112         for (uint32_t i = 0; i < MAX_HOP_INDEX; i++) {
113                 int tx_port = task->next_hops[i].mac_port.out_idx;
114                 if ((tx_port > targ->nb_txports - 1) && (tx_port > targ->nb_txrings - 1)) {
115                         PROX_PANIC(1, "Routing Table contains port %d but only %d tx port/ %d ring:\n", tx_port, targ->nb_txports, targ->nb_txrings);
116                 }
117         }
118
119         if (targ->nb_txrings) {
120                 struct task_args *dtarg;
121                 struct core_task ct;
122                 for (uint32_t i = 0; i < targ->nb_txrings; ++i) {
123                         ct = targ->core_task_set[0].core_task[i];
124                         dtarg = core_targ_get(ct.core, ct.task);
125                         dtarg = find_reachable_task_sending_to_port(dtarg);
126                         if (task->runtime_flags & TASK_MPLS_TAGGING) {
127                                 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[dtarg->tx_port_queue[0].port].eth_addr))) | ((uint64_t)ETYPE_MPLSU << (64 - 16));
128                         } else {
129                                 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[dtarg->tx_port_queue[0].port].eth_addr))) | ((uint64_t)ETYPE_IPv4 << (64 - 16));
130                         }
131                 }
132         } else {
133                 for (uint32_t i = 0; i < targ->nb_txports; ++i) {
134                         if (task->runtime_flags & TASK_MPLS_TAGGING) {
135                                 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[targ->tx_port_queue[i].port].eth_addr))) | ((uint64_t)ETYPE_MPLSU << (64 - 16));
136                         } else {
137                                 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[targ->tx_port_queue[i].port].eth_addr))) | ((uint64_t)ETYPE_IPv4 << (64 - 16));
138                         }
139                 }
140         }
141
142         for (uint32_t i = 0; i < 4; ++i) {
143                 task->marking[i] = rte_bswap32(targ->marking[i] << 9);
144         }
145
146         struct prox_port_cfg *port = find_reachable_port(targ);
147         if (port) {
148                 task->offload_crc = port->capabilities.tx_offload_cksum;
149         }
150
151         targ->lconf->ctrl_func_m[targ->task] = routing_update;
152         targ->lconf->ctrl_timeout = freq_to_tsc(20);
153 }
154
155 static inline uint8_t handle_routing(struct task_routing *task, struct rte_mbuf *mbuf);
156
157 static int handle_routing_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
158 {
159         struct task_routing *task = (struct task_routing *)tbase;
160         uint8_t out[MAX_PKT_BURST];
161         uint16_t j;
162
163         prefetch_first(mbufs, n_pkts);
164
165         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
166 #ifdef PROX_PREFETCH_OFFSET
167                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
168                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
169 #endif
170                 out[j] = handle_routing(task, mbufs[j]);
171         }
172 #ifdef PROX_PREFETCH_OFFSET
173         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
174         for (; j < n_pkts; ++j) {
175                 out[j] = handle_routing(task, mbufs[j]);
176         }
177 #endif
178
179         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
180 }
181
182 static void set_l2(struct task_routing *task, struct rte_mbuf *mbuf, uint8_t nh_idx)
183 {
184         struct ether_hdr *peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
185         *((uint64_t *)(&peth->d_addr)) = task->next_hops[nh_idx].mac_port_8bytes;
186         *((uint64_t *)(&peth->s_addr)) = task->src_mac[task->next_hops[nh_idx].mac_port.out_idx];
187 }
188
189 static void set_l2_mpls(struct task_routing *task, struct rte_mbuf *mbuf, uint8_t nh_idx, uint16_t l2_len)
190 {
191         struct ether_hdr *peth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, sizeof(struct mpls_hdr));
192         l2_len += sizeof(struct mpls_hdr);
193         prox_ip_cksum(mbuf, (struct ipv4_hdr *)((uint8_t *)peth + l2_len), l2_len, sizeof(struct ipv4_hdr), task->offload_crc);
194
195         *((uint64_t *)(&peth->d_addr)) = task->next_hops[nh_idx].mac_port_8bytes;
196         *((uint64_t *)(&peth->s_addr)) = task->src_mac[task->next_hops[nh_idx].mac_port.out_idx];
197         /* MPLSU ether_type written as high word of 64bit src_mac prepared by init_task_routing */
198         struct mpls_hdr *mpls = (struct mpls_hdr *)(peth + 1);
199
200         if (task->runtime_flags & TASK_MARK) {
201                   enum rte_meter_color color = rte_sched_port_pkt_read_color(mbuf);
202
203                 *(uint32_t *)mpls = task->next_hops[nh_idx].mpls | task->marking[color] | 0x00010000; // Set BoS to 1
204         }
205         else {
206                 *(uint32_t *)mpls = task->next_hops[nh_idx].mpls | 0x00010000; // Set BoS to 1
207         }
208 }
209
210 static uint8_t route_ipv4(struct task_routing *task, uint8_t *beg, uint32_t ip_offset, struct rte_mbuf *mbuf)
211 {
212         struct ipv4_hdr *ip = (struct ipv4_hdr*)(beg + ip_offset);
213         struct ether_hdr *peth_out;
214         uint8_t tx_port;
215         uint32_t dst_ip;
216
217         if (unlikely(ip->version_ihl >> 4 != 4)) {
218                 plog_warn("Offset: %d\n", ip_offset);
219                 plog_warn("Expected to receive IPv4 packet but IP version was %d\n",
220                         ip->version_ihl >> 4);
221                 return OUT_DISCARD;
222         }
223
224         switch(ip->next_proto_id) {
225         case IPPROTO_GRE: {
226                 struct gre_hdr *pgre = (struct gre_hdr *)(ip + 1);
227                 dst_ip = ((struct ipv4_hdr *)(pgre + 1))->dst_addr;
228                 break;
229         }
230         case IPPROTO_TCP:
231         case IPPROTO_UDP:
232                 dst_ip = ip->dst_addr;
233                 break;
234         default:
235                 /* Routing for other protocols is not implemented */
236                 return OUT_DISCARD;
237         }
238
239 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,1)
240         uint32_t next_hop_index;
241 #else
242         uint8_t next_hop_index;
243 #endif
244         if (unlikely(rte_lpm_lookup(task->ipv4_lpm, rte_bswap32(dst_ip), &next_hop_index) != 0)) {
245                 uint8_t* dst_ipp = (uint8_t*)&dst_ip;
246                 plog_warn("lpm_lookup failed for ip %d.%d.%d.%d: rc = %d\n",
247                         dst_ipp[0], dst_ipp[1], dst_ipp[2], dst_ipp[3], -ENOENT);
248                 return OUT_DISCARD;
249         }
250
251         tx_port = task->next_hops[next_hop_index].mac_port.out_idx;
252         if (task->runtime_flags & TASK_MPLS_TAGGING) {
253                 uint16_t padlen = rte_pktmbuf_pkt_len(mbuf) - rte_be_to_cpu_16(ip->total_length) - ip_offset;
254                 if (padlen) {
255                         rte_pktmbuf_trim(mbuf, padlen);
256                 }
257
258                 set_l2_mpls(task, mbuf, next_hop_index, ip_offset);
259         }
260         else {
261                 set_l2(task, mbuf, next_hop_index);
262         }
263         return tx_port;
264 }
265
266 static inline uint8_t handle_routing(struct task_routing *task, struct rte_mbuf *mbuf)
267 {
268         struct qinq_hdr *qinq;
269         struct ether_hdr *peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
270
271         switch (peth->ether_type) {
272         case ETYPE_8021ad: {
273                 struct qinq_hdr *qinq = (struct qinq_hdr *)peth;
274                 if ((qinq->cvlan.eth_proto != ETYPE_VLAN)) {
275                         plog_warn("Unexpected proto in QinQ = %#04x\n", qinq->cvlan.eth_proto);
276                         return OUT_DISCARD;
277                 }
278
279                 return route_ipv4(task, (uint8_t*)qinq, sizeof(*qinq), mbuf);
280         }
281         case ETYPE_IPv4:
282                 return route_ipv4(task, (uint8_t*)peth, sizeof(*peth), mbuf);
283         case ETYPE_MPLSU: {
284                 /* skip MPLS headers if any for routing */
285                 struct mpls_hdr *mpls = (struct mpls_hdr *)(peth + 1);
286                 uint32_t count = sizeof(struct ether_hdr);
287                 while (!(mpls->bytes & 0x00010000)) {
288                         mpls++;
289                         count += sizeof(struct mpls_hdr);
290                 }
291                 count += sizeof(struct mpls_hdr);
292
293                 return route_ipv4(task, (uint8_t*)peth, count, mbuf);
294         }
295         default:
296                 if (peth->ether_type == task->qinq_tag) {
297                         struct qinq_hdr *qinq = (struct qinq_hdr *)peth;
298                         if ((qinq->cvlan.eth_proto != ETYPE_VLAN)) {
299                                 plog_warn("Unexpected proto in QinQ = %#04x\n", qinq->cvlan.eth_proto);
300                                 return OUT_DISCARD;
301                         }
302
303                         return route_ipv4(task, (uint8_t*)qinq, sizeof(*qinq), mbuf);
304                 }
305                 plog_warn("Failed routing packet: ether_type %#06x is unknown\n", peth->ether_type);
306                 return OUT_DISCARD;
307         }
308 }
309
310 static struct task_init task_init_routing = {
311         .mode_str = "routing",
312         .init = init_task_routing,
313         .handle = handle_routing_bulk,
314         .flag_features = TASK_FEATURE_ROUTING,
315         .size = sizeof(struct task_routing)
316 };
317
318 __attribute__((constructor)) static void reg_task_routing(void)
319 {
320         reg_task(&task_init_routing);
321 }