ff00fb4f430e7872287f71bdb5aa20e7a053f170
[samplevnf.git] / VNFs / DPPD-PROX / packet_utils.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <rte_lcore.h>
18 #include <rte_hash.h>
19 #include <rte_hash_crc.h>
20 #include "task_base.h"
21 #include "lconf.h"
22 #include "prefetch.h"
23 #include "log.h"
24 #include "handle_master.h"
25 #include "prox_port_cfg.h"
26
27 static inline int find_ip(struct ether_hdr_arp *pkt, uint16_t len, uint32_t *ip_dst)
28 {
29         struct vlan_hdr *vlan_hdr;
30         struct ether_hdr *eth_hdr = (struct ether_hdr*)pkt;
31         struct ipv4_hdr *ip;
32         uint16_t ether_type = eth_hdr->ether_type;
33         uint16_t l2_len = sizeof(struct ether_hdr);
34
35         // Unstack VLAN tags
36         while (((ether_type == ETYPE_8021ad) || (ether_type == ETYPE_VLAN)) && (l2_len + sizeof(struct vlan_hdr) < len)) {
37                 vlan_hdr = (struct vlan_hdr *)((uint8_t *)pkt + l2_len);
38                 l2_len +=4;
39                 ether_type = vlan_hdr->eth_proto;
40         }
41
42         switch (ether_type) {
43         case ETYPE_MPLSU:
44         case ETYPE_MPLSM:
45                 // In case of MPLS, next hop MAC is based on MPLS, not destination IP
46                 l2_len = 0;
47                 break;
48         case ETYPE_IPv4:
49                 break;
50         case ETYPE_EoGRE:
51         case ETYPE_ARP:
52         case ETYPE_IPv6:
53                 l2_len = 0;
54                 break;
55         default:
56                 l2_len = 0;
57                 plog_warn("Unsupported packet type %x - CRC might be wrong\n", ether_type);
58                 break;
59         }
60
61         if (l2_len && (l2_len + sizeof(struct ipv4_hdr) <= len)) {
62                 struct ipv4_hdr *ip = (struct ipv4_hdr *)((uint8_t *)pkt + l2_len);
63                 // TODO: implement LPM => replace ip_dst by next hop IP DST
64                 *ip_dst = ip->dst_addr;
65                 return 0;
66         }
67         return -1;
68 }
69
70 int write_dst_mac(struct task_base *tbase, struct rte_mbuf *mbuf, uint32_t *ip_dst)
71 {
72         const uint64_t hz = rte_get_tsc_hz();
73         struct ether_hdr_arp *packet = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
74         struct ether_addr *mac = &packet->ether_hdr.d_addr;
75
76         uint64_t tsc = rte_rdtsc();
77         struct l3_base *l3 = &(tbase->l3);
78         if (l3->gw.ip) {
79                 if (likely((l3->flags & FLAG_DST_MAC_KNOWN) && (tsc < l3->gw.arp_update_time) && (tsc < l3->gw.arp_timeout))) {
80                         memcpy(mac, &l3->gw.mac, sizeof(struct ether_addr));
81                         return 0;
82                 } else if (tsc > l3->gw.arp_update_time) {
83                         // long time since we have sent an arp, send arp
84                         l3->gw.arp_update_time = tsc + hz;
85                         *ip_dst = l3->gw.ip;
86                         return -1;
87                 }
88                 return -2;
89         }
90
91         uint16_t len = rte_pktmbuf_pkt_len(mbuf);
92         if (find_ip(packet, len, ip_dst) != 0) {
93                 return 0;
94         }
95         if (likely(l3->n_pkts < 4)) {
96                 for (unsigned int idx = 0; idx < l3->n_pkts; idx++) {
97                         if (*ip_dst == l3->optimized_arp_table[idx].ip) {
98                                 if ((tsc < l3->optimized_arp_table[idx].arp_update_time) && (tsc < l3->optimized_arp_table[idx].arp_timeout)) {
99                                         memcpy(mac, &l3->optimized_arp_table[idx].mac, sizeof(struct ether_addr));
100                                         return 0;
101                                 } else if (tsc > l3->optimized_arp_table[idx].arp_update_time) {
102                                         l3->optimized_arp_table[idx].arp_update_time = tsc + hz;
103                                         return -1;
104                                 } else {
105                                         return -2;
106                                 }
107                         }
108                 }
109                 l3->optimized_arp_table[l3->n_pkts].ip = *ip_dst;
110                 l3->optimized_arp_table[l3->n_pkts].arp_update_time = tsc + hz;
111                 l3->n_pkts++;
112
113                 if (l3->n_pkts < 4)
114                         return -1;
115
116                 // We have ** many ** IP addresses; lets use hash table instead
117                 for (uint32_t idx = 0; idx < l3->n_pkts; idx++) {
118                         uint32_t ip = l3->optimized_arp_table[idx].ip;
119                         int ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
120                         if (ret < 0) {
121                                 plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip));
122                         } else {
123                                 memcpy(&l3->arp_table[ret], &l3->optimized_arp_table[idx], sizeof(struct arp_table));
124                         }
125                 }
126                 return -1;
127         } else {
128                 // Find mac in lookup table. Send ARP if not found
129                 int ret = rte_hash_lookup(l3->ip_hash, (const void *)ip_dst);
130                 if (unlikely(ret < 0)) {
131                         int ret = rte_hash_add_key(l3->ip_hash, (const void *)ip_dst);
132                         if (ret < 0) {
133                                 plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(*ip_dst));
134                                 return -2;
135                         } else {
136                                 l3->arp_table[ret].ip = *ip_dst;
137                                 l3->arp_table[ret].arp_update_time = tsc + hz;
138                         }
139                         return -1;
140                 } else {
141                         if ((tsc < l3->arp_table[ret].arp_update_time) && (tsc < l3->arp_table[ret].arp_timeout)) {
142                                 memcpy(mac, &l3->arp_table[ret].mac, sizeof(struct ether_addr));
143                                 return 0;
144                         } else if (tsc > l3->arp_table[ret].arp_update_time) {
145                                 l3->arp_table[ret].arp_update_time = tsc + hz;
146                                 return -1;
147                         } else {
148                                 return -2;
149                         }
150                 }
151         }
152         return 0;
153 }
154
155 void task_init_l3(struct task_base *tbase, struct task_args *targ)
156 {
157         static char hash_name[30];
158         uint32_t n_entries = MAX_ARP_ENTRIES * 4;
159         const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
160         sprintf(hash_name, "A%03d_%03d_mac_table", targ->lconf->id, targ->id);
161
162         hash_name[0]++;
163
164         struct rte_hash_parameters hash_params = {
165                 .name = hash_name,
166                 .entries = n_entries,
167                 .key_len = sizeof(uint32_t),
168                 .hash_func = rte_hash_crc,
169                 .hash_func_init_val = 0,
170         };
171         tbase->l3.ip_hash = rte_hash_create(&hash_params);
172         PROX_PANIC(tbase->l3.ip_hash == NULL, "Failed to set up ip hash table\n");
173
174         tbase->l3.arp_table = (struct arp_table *)prox_zmalloc(n_entries * sizeof(struct arp_table), socket_id);
175         PROX_PANIC(tbase->l3.arp_table == NULL, "Failed to allocate memory for %u entries in arp table\n", n_entries);
176         plog_info("\tarp table, with %d entries of size %ld\n", n_entries, sizeof(struct l3_base));
177
178         targ->lconf->ctrl_func_p[targ->task] = handle_ctrl_plane_pkts;
179         targ->lconf->ctrl_timeout = freq_to_tsc(targ->ctrl_freq);
180         tbase->l3.gw.ip = rte_cpu_to_be_32(targ->gateway_ipv4);
181         tbase->flags |= TASK_L3;
182         tbase->l3.core_id = targ->lconf->id;
183         tbase->l3.task_id = targ->id;
184         tbase->l3.tmaster = targ->tmaster;
185 }
186
187 void task_start_l3(struct task_base *tbase, struct task_args *targ)
188 {
189         struct prox_port_cfg *port = find_reachable_port(targ);
190         if (port) {
191                 tbase->l3.reachable_port_id = port - prox_port_cfg;
192                 if (targ->local_ipv4) {
193                         tbase->local_ipv4 = rte_be_to_cpu_32(targ->local_ipv4);
194                         register_ip_to_ctrl_plane(tbase->l3.tmaster, tbase->local_ipv4, tbase->l3.reachable_port_id, targ->lconf->id, targ->id);
195                 }
196         }
197 }
198
199 void task_set_gateway_ip(struct task_base *tbase, uint32_t ip)
200 {
201         tbase->l3.gw.ip = ip;
202         tbase->flags &= ~FLAG_DST_MAC_KNOWN;
203 }
204
205 void task_set_local_ip(struct task_base *tbase, uint32_t ip)
206 {
207         tbase->local_ipv4 = ip;
208 }
209
210 void handle_ctrl_plane_pkts(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
211 {
212         uint8_t out[1];
213         const uint64_t hz = rte_get_tsc_hz();
214         uint32_t ip, ip_dst, idx;
215         int j;
216         uint16_t command;
217         struct ether_hdr_arp *hdr;
218         struct l3_base *l3 = &tbase->l3;
219         uint64_t tsc= rte_rdtsc();
220
221         for (j = 0; j < n_pkts; ++j) {
222                 PREFETCH0(mbufs[j]);
223         }
224         for (j = 0; j < n_pkts; ++j) {
225                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j], void *));
226         }
227
228         for (j = 0; j < n_pkts; ++j) {
229                 out[0] = OUT_HANDLED;
230                 command = mbufs[j]->udata64 & 0xFFFF;
231                 plogx_dbg("\tReceived %s mbuf %p\n", actions_string[command], mbufs[j]);
232                 switch(command) {
233                 case UPDATE_FROM_CTRL:
234                         hdr = rte_pktmbuf_mtod(mbufs[j], struct ether_hdr_arp *);
235                         ip = (mbufs[j]->udata64 >> 32) & 0xFFFFFFFF;
236
237                         if (ip == l3->gw.ip) {
238                                 // MAC address of the gateway
239                                 memcpy(&l3->gw.mac, &hdr->arp.data.sha, 6);
240                                 l3->flags |= FLAG_DST_MAC_KNOWN;
241                                 l3->gw.arp_timeout = tsc + 30 * hz;
242                         } else if (l3->n_pkts < 4) {
243                                 // Few packets tracked - should be faster to loop through them thean using a hash table
244                                 for (idx = 0; idx < l3->n_pkts; idx++) {
245                                         ip_dst = l3->optimized_arp_table[idx].ip;
246                                         if (ip_dst == ip)
247                                                 break;
248                                 }
249                                 if (idx < l3->n_pkts) {
250                                         // IP not found; this is a reply while we never asked for the request!
251                                         memcpy(&l3->optimized_arp_table[idx].mac, &(hdr->arp.data.sha), sizeof(struct ether_addr));
252                                         l3->optimized_arp_table[idx].arp_timeout = tsc + 30 * hz;
253                                 }
254                         } else {
255                                 int ret = rte_hash_add_key(l3->ip_hash, (const void *)&ip);
256                                 if (ret < 0) {
257                                         plogx_info("Unable add ip %d.%d.%d.%d in mac_hash\n", IP4(ip));
258                                 } else {
259                                         memcpy(&l3->arp_table[ret].mac, &(hdr->arp.data.sha), sizeof(struct ether_addr));
260                                         l3->arp_table[ret].arp_timeout = tsc + 30 * hz;
261                                 }
262                         }
263                         tx_drop(mbufs[j]);
264                         break;
265                 case ARP_REPLY_FROM_CTRL:
266                 case ARP_REQ_FROM_CTRL:
267                         TASK_STATS_ADD_TX_NON_DP(&tbase->aux->stats, 1);
268                         out[0] = 0;
269                         tbase->aux->tx_pkt_l2(tbase, &mbufs[j], 1, out);
270                         break;
271                 }
272         }
273 }