2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <rte_byteorder.h>
18 #include <rte_cycles.h>
22 #include "prox_malloc.h"
23 #include "task_init.h"
28 #include "hash_entry_types.h"
30 #include "prox_cksum.h"
35 #include "prox_assert.h"
36 #include "pkt_prototypes.h"
40 struct ether_addr clt_mac;
42 } __attribute__((__packed__));
52 } __attribute__((__packed__));
54 struct task_gre_decap {
55 struct task_base base;
56 struct rte_hash *cpe_gre_hash;
57 struct cpe_gre_data *cpe_gre_data;
58 struct lcore_cfg *lconf;
59 uint8_t runtime_flags;
60 uint8_t mapping[PROX_MAX_PORTS];
61 uint32_t bucket_index;
63 const void* key_ptr[16];
64 struct cpe_gre_key key[16];
67 double cycles_per_byte;
72 static void handle_gre_decap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
73 static void handle_gre_encap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
75 static inline uint8_t handle_gre_encap(struct task_gre_decap *task, struct rte_mbuf *mbuf, struct cpe_gre_data *table);
76 static inline void handle_gre_encap16(struct task_gre_decap *task, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out);
77 static inline uint8_t handle_gre_decap(struct task_gre_decap *tbase, struct rte_mbuf *mbuf);
79 void update_arp_entries_gre(void *data);
81 static void init_cpe_gre_hash(struct task_args *targ)
88 /* Already set up by other task */
89 if (targ->cpe_gre_hash) {
93 lcore_id = targ->lconf->id;
94 socket_id = rte_lcore_to_socket_id(lcore_id);
95 sprintf(name, "core_%u_CPE_GRE_Table", targ->lconf->id);
96 table_part = targ->nb_slave_threads;
100 if (!rte_is_power_of_2(table_part)) {
101 table_part = rte_align32pow2(table_part) >> 1;
104 struct rte_hash_parameters hash_params = {
106 .entries = MAX_GRE / table_part,
107 .bucket_entries = GRE_BUCKET_ENTRIES,
108 .key_len = sizeof(struct cpe_gre_key),
109 .hash_func_init_val = 0,
110 .socket_id = socket_id
113 struct rte_hash* phash = rte_hash_create(&hash_params);
114 struct cpe_gre_data *cpe_gre_data = prox_zmalloc(MAX_GRE / table_part, socket_id);
116 PROX_PANIC(phash == NULL, "Unable to allocate memory for IPv4 hash table on core %u\n", lcore_id);
118 for (uint8_t task_id = 0; task_id < targ->lconf->n_tasks_all; ++task_id) {
119 enum task_mode smode = targ->lconf->targs[task_id].mode;
120 if (smode == GRE_DECAP || smode == GRE_ENCAP) {
121 targ->lconf->targs[task_id].cpe_gre_hash = phash;
122 targ->lconf->targs[task_id].cpe_gre_data = cpe_gre_data;
127 static void init_task_gre_decap(struct task_base *tbase, struct task_args *targ)
129 struct task_gre_decap *task = (struct task_gre_decap *)tbase;
131 init_cpe_gre_hash(targ);
132 task->cpe_gre_hash = targ->cpe_gre_hash;
133 task->cpe_gre_data = targ->cpe_gre_data;
134 task->runtime_flags = targ->runtime_flags;
135 task->lconf = targ->lconf;
136 task->cpe_timeout = msec_to_tsc(targ->cpe_table_timeout_ms);
138 targ->lconf->period_func = update_arp_entries_gre;
139 targ->lconf->period_data = tbase;
140 targ->lconf->period_timeout = msec_to_tsc(500) / NUM_VCPES;
142 for (uint8_t i = 0; i < 16; ++i) {
143 task->key_ptr[i] = &task->key[i];
147 static void init_task_gre_encap(struct task_base *tbase, struct task_args *targ)
149 struct task_gre_decap *task = (struct task_gre_decap *)tbase;
151 init_cpe_gre_hash(targ);
152 task->cpe_gre_hash = targ->cpe_gre_hash;
153 task->cpe_gre_data = targ->cpe_gre_data;
154 task->runtime_flags = targ->runtime_flags;
155 task->lconf = targ->lconf;
157 struct port_cfg *port = find_reachable_task_sending_to_port(targ);
159 task->offload_crc = port->capabilities.tx_offload_cksum;
164 task->cycles_per_byte = ((double)rte_get_tsc_hz()) / ((double)targ->tb_rate);
165 task->tb_size = targ->tb_size != 0 ? targ->tb_size : 1520;
168 /* traffic policing disabled */
169 task->cycles_per_byte = 0;
174 static struct task_init task_init_gre_decap = {
176 .mode_str = "gredecap",
177 .init = init_task_gre_decap,
178 .handle = handle_gre_decap_bulk,
179 .size = sizeof(struct task_gre_decap)
182 static struct task_init task_init_gre_encap = {
184 .mode_str = "greencap",
185 .init = init_task_gre_encap,
186 .handle = handle_gre_encap_bulk,
187 .size = sizeof(struct task_gre_decap)
190 __attribute__((constructor)) static void reg_task_gre(void)
192 reg_task(&task_init_gre_decap);
193 reg_task(&task_init_gre_encap);
196 void handle_gre_decap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
198 struct task_gre_decap *task = (struct task_gre_decap *)tbase;
199 uint8_t out[MAX_PKT_BURST];
202 prefetch_first(mbufs, n_pkts);
204 for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
205 #ifdef PROX_PREFETCH_OFFSET
206 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
207 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
209 out[j] = handle_gre_decap(task, mbufs[j]);
211 #ifdef PROX_PREFETCH_OFFSET
212 PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
213 for (; j < n_pkts; ++j) {
214 out[j] = handle_gre_decap(task, mbufs[j]);
218 task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
222 struct ether_hdr eth;
226 struct ether_hdr eth2;
229 } __attribute__((__packed__));
231 /* Handle ipv4 over GRE and Ethernet over GRE. In case of ipv4 over
232 GRE remove gre and ipv4 header and retain space for ethernet
233 header. In case of Eth over GRE remove external eth, gre and ipv4
234 headers and return pointer to payload */
235 static inline struct ether_hdr *gre_decap(struct gre_hdr *pgre, struct rte_mbuf *mbuf)
238 if (pgre->type == ETYPE_EoGRE) {
239 hsize = sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + sizeof(struct gre_hdr);
241 else if (pgre->type == ETYPE_IPv4) {
242 /* retain sizeof(struct ether_hdr) */
243 hsize = sizeof(struct ipv4_hdr) + sizeof(struct gre_hdr);
249 return (struct ether_hdr *)rte_pktmbuf_adj(mbuf, hsize);
252 static inline uint8_t handle_gre_decap(struct task_gre_decap *task, struct rte_mbuf *mbuf)
254 struct ipv4_hdr *pip = (struct ipv4_hdr *)(rte_pktmbuf_mtod(mbuf, struct ether_hdr *) + 1);
256 if (pip->next_proto_id != IPPROTO_GRE) {
257 plog_warn("Invalid packet proto_id = 0x%x expect 0x%x\n",
258 pip->next_proto_id, IPPROTO_GRE);
262 struct cpe_gre_data data;
263 struct cpe_gre_key key;
264 struct gre_hdr *pgre = (struct gre_hdr *)(pip + 1);
265 data.gre_id = pgre->gre_id;
266 data.cpe_ip = pip->src_addr;
268 struct ether_hdr *peth = gre_decap(pgre, mbuf);
269 PROX_PANIC(peth != 0, "Failed to gre_decap");
271 pip = (struct ipv4_hdr *)(peth + 1);
273 /* emulate client MAC for test purposes */
275 if (pgre->type == ETYPE_IPv4) {
276 struct ether_hdr eth = {
277 .d_addr = {.addr_bytes =
278 {0x0A, 0x02, 0x0A, 0x0A, 0x00, 0x01}},
279 .s_addr = {.addr_bytes =
280 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
281 .ether_type = ETYPE_IPv4
283 uint32_t hip = rte_bswap32(pip->src_addr);
284 eth.s_addr.addr_bytes[2] = (hip >> 24) & 0xFF;
285 eth.s_addr.addr_bytes[3] = (hip >> 16) & 0xFF;
286 eth.s_addr.addr_bytes[4] = (hip >> 8) & 0xFF;
287 eth.s_addr.addr_bytes[5] = (hip) & 0xFF;
288 rte_memcpy(peth, ð, sizeof(struct ether_hdr));
290 ether_addr_copy(&peth->s_addr, &key.clt_mac);
293 data.tsc = rte_rdtsc() + task->cpe_timeout;
295 int32_t hash_index = rte_hash_add_key(task->cpe_gre_hash, &key);
296 if (unlikely(hash_index < 0)) {
297 plog_warn("Failed to add key, gre %x\n", data.gre_id);
299 else if (unlikely(hash_index >= MAX_GRE)) {
300 plog_warn("Failed to add: Invalid hash_index = 0x%x\n",
304 rte_memcpy(&task->cpe_gre_data[hash_index], &data, sizeof(data));
305 if (task->runtime_flags & TASK_TX_CRC) {
306 prox_ip_cksum(mbuf, pip, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
312 void handle_gre_encap_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
314 struct task_gre_decap *task = (struct task_gre_decap *)tbase;
315 uint8_t out[MAX_PKT_BURST];
319 uint16_t chopped = RTE_MIN(n_pkts, 16);
320 prefetch_pkts(mbufs, chopped);
321 handle_gre_encap16(task, mbufs, chopped, out + done);
327 task->base.tx_pkt(&task->base, mbufs - done, done, out);
330 #define DO_ENC_ETH_OVER_GRE 1
331 #define DO_ENC_IP_OVER_GRE 0
333 static inline void handle_gre_encap16(struct task_gre_decap *task, struct rte_mbuf **mbufs, uint16_t n_pkts, uint8_t *out)
335 for (uint8_t i = 0; i < n_pkts; ++i) {
336 struct ether_hdr *peth = rte_pktmbuf_mtod(mbufs[i], struct ether_hdr *);
337 ether_addr_copy(&peth->d_addr, &task->key[i].clt_mac);
340 int32_t hash_index[16];
341 rte_hash_lookup_bulk(task->cpe_gre_hash, task->key_ptr, n_pkts, hash_index);
342 for (uint8_t i = 0; i < n_pkts; ++i ) {
343 if (unlikely(hash_index[i] < 0)) {
344 plog_warn("Invalid hash_index (<0) = 0x%x\n", hash_index[i]);
345 out[i] = OUT_DISCARD;
347 else if (unlikely(hash_index[i] >= MAX_GRE)) {
348 plog_warn("Invalid hash_index = 0x%x\n", hash_index[i]);
349 out[i] = OUT_DISCARD;
351 rte_prefetch0(&task->cpe_gre_data[hash_index[i]]);
354 for (uint8_t i = 0; i < n_pkts; ++i ) {
355 if (likely(out[i] != OUT_DISCARD)) {
356 out[i] = handle_gre_encap(task, mbufs[i], &task->cpe_gre_data[hash_index[i]]);
361 #ifdef DO_ENC_ETH_OVER_GRE
362 #define PKT_PREPEND_LEN (sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + sizeof(struct gre_hdr))
363 #elif DO_ENC_IP_OVER_GRE
364 #define PKT_PREPEND_LEN (sizeof(struct ipv4_hdr) + sizeof(struct gre_hdr))
367 static inline uint8_t handle_gre_encap(struct task_gre_decap *task, struct rte_mbuf *mbuf, struct cpe_gre_data *table)
369 struct ether_hdr *peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
370 struct ipv4_hdr *pip = (struct ipv4_hdr *)(peth + 1);
371 uint16_t ip_len = rte_be_to_cpu_16(pip->total_length);
373 struct cpe_gre_key key;
374 ether_addr_copy(&peth->d_addr, &key.clt_mac);
377 /* policing enabled */
378 if (task->cycles_per_byte) {
379 const uint16_t pkt_size = rte_pktmbuf_pkt_len(mbuf) + ETHER_CRC_LEN;
380 uint64_t tsc_now = rte_rdtsc();
381 if (table->tp_tbsize < pkt_size) {
382 uint64_t cycles_diff = tsc_now - table->tp_tsc;
383 double dB = ((double)cycles_diff) / task->cycles_per_byte;
384 if (dB > (double)task->tb_size) {
387 if ((table->tp_tbsize + dB) >= pkt_size) {
388 table->tp_tbsize += dB;
389 table->tp_tsc = tsc_now;
392 TASK_STATS_ADD_DROP_DISCARD(&task->base.aux->stats, 1);
396 table->tp_tbsize -= pkt_size;
400 /* reuse ethernet header from payload, retain payload (ip) in
401 case of DO_ENC_IP_OVER_GRE */
402 peth = (struct ether_hdr *)rte_pktmbuf_prepend(mbuf, PKT_PREPEND_LEN);
404 ip_len += PKT_PREPEND_LEN;
406 pip = (struct ipv4_hdr *)(peth + 1);
407 struct gre_hdr *pgre = (struct gre_hdr *)(pip + 1);
409 struct ether_hdr eth = {
410 .d_addr = {.addr_bytes = {0x0A, 0x0A, 0x0A, 0xC8, 0x00, 0x02}},
411 .s_addr = {.addr_bytes = {0x0A, 0x0A, 0x0A, 0xC8, 0x00, 0x01}},
412 .ether_type = ETYPE_IPv4
414 rte_memcpy(peth, ð, sizeof(struct ether_hdr));
416 rte_memcpy(pgre, &gre_hdr_proto, sizeof(struct gre_hdr));
417 #if DO_ENC_ETH_OVER_GRE
418 pgre->type = ETYPE_EoGRE;
419 #elif DO_ENC_IP_OVER_GRE
420 pgre->type = ETYPE_IPv4;
422 pgre->gre_id = table->gre_id;
424 rte_memcpy(pip, &tunnel_ip_proto, sizeof(struct ipv4_hdr));
425 pip->src_addr = 0x02010a0a; //emulate port ip
426 pip->dst_addr = table->cpe_ip;
427 pip->total_length = rte_cpu_to_be_16(ip_len);
429 if (task->runtime_flags & TASK_TX_CRC) {
430 prox_ip_cksum(mbuf, pip, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
436 void update_arp_entries_gre(void *data)
438 uint64_t cur_tsc = rte_rdtsc();
439 struct task_gre_decap *task = (struct task_gre_decap *)data;
441 #if RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)
442 // rte_hash_iterate might take a long time if no entries found => we should not use it here
443 // struct rte_hash is now internal.....
444 // => Not implemented
446 uint32_t *sig_bucket = (hash_sig_t *)&(task->cpe_gre_hash->sig_tbl[task->bucket_index * task->cpe_gre_hash->sig_tbl_bucket_size]);
447 uint32_t table_index = task->bucket_index * task->cpe_gre_hash->bucket_entries;
449 uint8_t *entry_bucket =
450 (uint8_t *) & task->cpe_gre_hash->key_tbl[task->bucket_index * task->cpe_gre_hash->bucket_entries * task->cpe_gre_hash->key_tbl_key_size];
452 for (uint32_t pos = 0; pos < task->cpe_gre_hash->bucket_entries; ++pos, ++table_index) {
453 struct cpe_gre_entry *key = (struct cpe_gre_entry *)&entry_bucket[pos * task->cpe_gre_hash->key_tbl_key_size];
454 if (task->cpe_gre_data[table_index].tsc < cur_tsc) {
456 task->cpe_gre_data[table_index].tsc = UINT64_MAX;
459 ++task->bucket_index;
460 task->bucket_index &= task->cpe_gre_hash->bucket_bitmask;