VNFs/DPPD-PROX/handle_lb_qinq.c

   1 /*
   2 // Copyright (c) 2010-2017 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //     http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include <string.h>
  18
  19 #include <rte_mbuf.h>
  20 #include <rte_ip.h>
  21 #include <rte_byteorder.h>
  22 #include <rte_version.h>
  23
  24 #include "prox_malloc.h"
  25 #include "task_base.h"
  26 #include "tx_pkt.h"
  27 #include "rx_pkt.h"
  28 #include "etypes.h"
  29 #include "log.h"
  30 #include "quit.h"
  31 #include "qinq.h"
  32 #include "lconf.h"
  33 #include "prefetch.h"
  34 #include "defines.h"
  35 #include "prox_cfg.h"
  36 #include "hash_utils.h"
  37 #include "handle_lb_net.h"
  38 #include "toeplitz.h"
  39
  40 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
  41 #define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
  42 #endif
  43
  44 /* Load balancing based on one byte, figures out what type of packet
  45    is passed and depending on the type, pass the packet to the correct
  46    worker thread. If an unsupported packet type is used, the packet is
  47    simply dropped. This Load balancer can only handling QinQ packets
  48    (i.e. packets comming from the vCPE). */
  49 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
  50 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
  51
  52 struct task_lb_qinq {
  53         struct task_base        base;
  54         uint8_t                 *worker_table;
  55         uint8_t                 bit_mask;
  56         uint8_t                 protocols_mask;
  57         uint8_t                 nb_worker_threads;
  58         uint16_t                qinq_tag;
  59 };
  60
  61 static void init_task_lb_qinq(struct task_base *tbase, struct task_args *targ)
  62 {
  63         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
  64         const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
  65
  66         task->qinq_tag = targ->qinq_tag;
  67         task->nb_worker_threads = targ->nb_worker_threads;
  68         task->bit_mask = rte_is_power_of_2(targ->nb_worker_threads) ? targ->nb_worker_threads - 1 : 0xff;
  69
  70         /* The load distributor is sending to a set of cores. These
  71            cores are responsible for handling a set of flows
  72            identified by a qinq tag. The load distributor identifies
  73            the flows and forwards them to the appropriate worker. The
  74            mapping from flow to worker is stored within the
  75            work_table. Build the worker_table by asking each worker
  76            which flows are handled. */
  77
  78         task->worker_table = prox_zmalloc(0x1000000, socket_id);
  79         for (int i = 0; i < targ->nb_worker_threads; ++i) {
  80                 struct core_task ct = targ->core_task_set[0].core_task[i];
  81                 struct task_args *t = core_targ_get(ct.core, ct.task);
  82
  83                 PROX_PANIC(t->task_init->flow_iter.beg == NULL,
  84                            "Load distributor can't find flows owned by destination worker %d\n", i);
  85
  86                 struct flow_iter *it = &t->task_init->flow_iter;
  87
  88                 int cnt = 0;
  89                 for (it->beg(it, t); !it->is_end(it, t); it->next(it, t)) {
  90                         uint16_t svlan = it->get_svlan(it, t);
  91                         uint16_t cvlan = it->get_cvlan(it, t);
  92
  93                         task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)] = i;
  94                 }
  95
  96         }
  97
  98         /* Check which protocols we are allowed to send to worker tasks */
  99         for (int i = 0; i < MAX_PROTOCOLS; ++i) {
 100                 int is_active = !!targ->core_task_set[i].n_elems;
 101                 task->protocols_mask |= is_active << i;
 102         }
 103         plog_info("\t\ttask_lb_qinq protocols_mask = 0x%x\n", task->protocols_mask);
 104
 105         if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_RSS)
 106                 tbase->flags |=  BASE_FLAG_LUT_QINQ_RSS;
 107         if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_HASH)
 108                 tbase->flags |=  BASE_FLAG_LUT_QINQ_HASH;
 109         plog_info("\t\ttask_lb_qinq flags = 0x%x\n", tbase->flags);
 110 }
 111
 112 static struct task_init task_init_lb_qinq = {
 113         .mode_str = "lbqinq",
 114         .init = init_task_lb_qinq,
 115         .handle = handle_lb_qinq_bulk,
 116         .size = sizeof(struct task_lb_qinq)
 117 };
 118
 119 /*
 120         Add correct port id to mbufs coming from a DPDK ring port in the loadbalancer.
 121         For the split-bng using DPDK rings between the vSwitch and the VMs
 122         we need to know the port from which a packet was received.
 123         The ring PMD in dpdk does not update the port field in the mbuf
 124         and thus we have no control over the port numbers that are being used.
 125         This submode allows the loadbalancer to set the port number on which it
 126         received the mbuf.
 127 */
 128 static struct task_init task_init_lb_qinq_set_port = {
 129         .mode_str = "lbqinq",
 130         .sub_mode_str = "lut_qinq_set_port",
 131         .init = init_task_lb_qinq,
 132         .handle = handle_lb_qinq_bulk_set_port,
 133         .size = sizeof(struct task_lb_qinq)
 134 };
 135
 136 /*
 137         Load Balance on Hash of combination of cvlan and svlan
 138 */
 139 static struct task_init task_init_lb_qinq_hash_friend = {
 140         .mode_str = "lbqinq",
 141         .sub_mode_str ="lut_qinq_hash_friend",
 142         .init = init_task_lb_qinq,
 143         .handle = handle_lb_qinq_bulk,
 144         .flag_features = TASK_FEATURE_LUT_QINQ_HASH,
 145         .size = sizeof(struct task_lb_qinq)
 146 };
 147
 148 /*
 149         Load Balance on rss of combination of cvlan and svlan.
 150         This could be used to compare with HW implementations.
 151 */
 152 static struct task_init task_init_lb_qinq_rss_friend = {
 153         .mode_str = "lbqinq",
 154         .sub_mode_str ="lut_qinq_rss_friend",
 155         .init = init_task_lb_qinq,
 156         .handle = handle_lb_qinq_bulk,
 157         .flag_features = TASK_FEATURE_LUT_QINQ_RSS,
 158         .size = sizeof(struct task_lb_qinq)
 159 };
 160
 161 __attribute__((constructor)) static void reg_task_lb_qinq(void)
 162 {
 163         reg_task(&task_init_lb_qinq);
 164         reg_task(&task_init_lb_qinq_hash_friend);
 165         reg_task(&task_init_lb_qinq_rss_friend);
 166         reg_task(&task_init_lb_qinq_set_port);
 167 }
 168
 169 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf);
 170
 171 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
 172 {
 173         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
 174         uint8_t out[MAX_PKT_BURST];
 175         uint16_t j;
 176
 177         prefetch_first(mbufs, n_pkts);
 178
 179         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
 180 #ifdef PROX_PREFETCH_OFFSET
 181                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
 182                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
 183 #endif
 184                 out[j] = handle_lb_qinq(task, mbufs[j]);
 185         }
 186 #ifdef PROX_PREFETCH_OFFSET
 187         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
 188         for (; j < n_pkts; ++j) {
 189                 out[j] = handle_lb_qinq(task, mbufs[j]);
 190         }
 191 #endif
 192
 193         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
 194 }
 195
 196 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
 197 {
 198         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
 199         uint8_t out[MAX_PKT_BURST];
 200         uint16_t j;
 201 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
 202         uint32_t port_id = mbufs[0]->pkt.in_port;
 203 #else
 204         uint32_t port_id = mbufs[0]->port;
 205 #endif
 206
 207         if (tbase->rx_pkt == rx_pkt_hw) {
 208                 port_id = tbase->rx_params_hw.last_read_portid + tbase->rx_params_hw.nb_rxports;
 209                 port_id = ( port_id - 1 ) % tbase->rx_params_hw.nb_rxports;
 210                 port_id = tbase->rx_params_hw.rx_pq[port_id].port;
 211         } else if (tbase->rx_pkt == rx_pkt_hw1) {
 212                 port_id = tbase->rx_params_hw1.rx_pq.port;
 213         }
 214
 215         prefetch_first(mbufs, n_pkts);
 216
 217         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
 218 #ifdef PROX_PREFETCH_OFFSET
 219                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
 220                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
 221 #endif
 222 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
 223                 mbufs[j]->pkt.in_port = port_id;
 224 #else
 225                 mbufs[j]->port = port_id;
 226 #endif
 227                 out[j] = handle_lb_qinq(task, mbufs[j]);
 228         }
 229 #ifdef PROX_PREFETCH_OFFSET
 230         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
 231         for (; j < n_pkts; ++j) {
 232 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
 233                 mbufs[j]->pkt.in_port = port_id;
 234 #else
 235                 mbufs[j]->port = port_id;
 236 #endif
 237                 out[j] = handle_lb_qinq(task, mbufs[j]);
 238         }
 239 #endif
 240
 241         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
 242 }
 243
 244 struct qinq_packet {
 245         struct qinq_hdr qinq_hdr;
 246         union {
 247                 struct ipv4_hdr ipv4_hdr;
 248                 struct ipv6_hdr ipv6_hdr;
 249         };
 250 } __attribute__((packed));
 251
 252 struct qinq_packet_data {
 253         struct ether_addr  d_addr;
 254         struct ether_addr  s_addr;
 255         uint64_t qinq;
 256 } __attribute__((packed));
 257
 258 struct ether_packet {
 259         struct ether_hdr ether_hdr;
 260         union {
 261                 struct ipv4_hdr ipv4_hdr;
 262                 struct ipv6_hdr ipv6_hdr;
 263         };
 264 } __attribute__((packed));
 265
 266 struct cpe_packet {
 267         union {
 268                 struct qinq_packet  qp;
 269                 struct ether_packet ep;
 270                 struct qinq_packet_data qd;
 271         };
 272 };
 273
 274 static inline uint8_t get_worker(struct task_lb_qinq *task, struct cpe_packet *packet)
 275 {
 276         uint8_t worker = 0;
 277         if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_HASH) {
 278                 // Load Balance on Hash of combination of cvlan and svlan
 279                 uint64_t qinq_net = packet->qd.qinq;
 280                 qinq_net = qinq_net & 0xFF0F0000FF0F0000;       // Mask Proto and QoS bits
 281                 if (task->bit_mask != 0xff) {
 282                         worker = hash_crc32(&qinq_net,8,0) & task->bit_mask;
 283                 }
 284                 else {
 285                         worker = hash_crc32(&qinq_net,8,0) % task->nb_worker_threads;
 286                 }
 287                 plogx_dbg("Sending packet svlan=%x, cvlan=%x, pseudo_qinq=%lx to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq_net, worker);
 288         } else if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_RSS){
 289                 // Load Balance on rss of combination of cvlan and svlan
 290                 uint32_t qinq = (packet->qp.qinq_hdr.cvlan.vlan_tci & 0xFF0F) << 16;
 291                 uint32_t rss = toeplitz_hash((uint8_t *)&qinq, 4);
 292                 if (task->bit_mask != 0xff) {
 293                         worker = rss & task->bit_mask;
 294                 } else {
 295                         worker = (0x1ff & rss) % task->nb_worker_threads;
 296                 }
 297                 plogx_dbg("Sending packet svlan=%x, cvlan=%x, rss_input=%x, rss=%x to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq, rss, worker);
 298         } else {
 299                 uint16_t svlan = packet->qp.qinq_hdr.svlan.vlan_tci;
 300                 uint16_t cvlan = packet->qp.qinq_hdr.cvlan.vlan_tci;
 301                 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
 302                 worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
 303
 304                 const size_t pos = offsetof(struct cpe_packet, qp.qinq_hdr.cvlan.vlan_tci);
 305                 plogx_dbg("qinq = %u, worker = %u, pos = %lu\n", rte_be_to_cpu_16(cvlan), worker, pos);
 306         }
 307         return worker;
 308 }
 309
 310 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf)
 311 {
 312         struct cpe_packet *packet = rte_pktmbuf_mtod(mbuf, struct cpe_packet*);
 313         if (packet->ep.ether_hdr.ether_type == ETYPE_IPv4) {
 314                 if (unlikely((packet->ep.ipv4_hdr.version_ihl >> 4) != 4)) {
 315                         plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->ep.ipv4_hdr.version_ihl);
 316                         return OUT_DISCARD;
 317                 }
 318                 /* use 24 bits from the IP, clients are from the 10.0.0.0/8 network */
 319                 const uint32_t tmp = rte_bswap32(packet->ep.ipv4_hdr.src_addr) & 0x00FFFFFF;
 320                 const uint32_t svlan = rte_bswap16(tmp >> 12);
 321                 const uint32_t cvlan = rte_bswap16(tmp & 0x0FFF);
 322                 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
 323                 uint8_t worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
 324                 return worker + IPV4 * task->nb_worker_threads;
 325         }
 326         else if (unlikely(packet->qp.qinq_hdr.svlan.eth_proto != task->qinq_tag)) {
 327                 /* might receive LLDP from the L2 switch... */
 328                 if (packet->qp.qinq_hdr.svlan.eth_proto != ETYPE_LLDP) {
 329                         plogdx_err(mbuf, "Invalid packet for LB in QinQ mode\n");
 330                 }
 331                 return OUT_DISCARD;
 332         }
 333
 334         uint8_t worker = 0;
 335         uint8_t proto = 0xFF;
 336         switch (packet->qp.qinq_hdr.ether_type) {
 337         case ETYPE_IPv4: {
 338                 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 4)) {
 339                         plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->qp.ipv4_hdr.version_ihl);
 340                         return OUT_DISCARD;
 341                 }
 342                 worker = get_worker(task, packet);
 343                 proto = IPV4;
 344                 break;
 345         }
 346         case ETYPE_IPv6: {
 347                 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 6)) {
 348                         plogx_err("Invalid Version %u for ETYPE_IPv6\n", packet->qp.ipv4_hdr.version_ihl);
 349                         return OUT_DISCARD;
 350                 }
 351                 /* Use IP Destination when IPV6 QinQ */
 352                 if (task->bit_mask != 0xff) {
 353                         worker = ((uint8_t *)packet)[61] & task->bit_mask;
 354                 }
 355                 else {
 356                         worker = ((uint8_t *)packet)[61] % task->nb_worker_threads;
 357                 }
 358                 proto = IPV6;
 359                 break;
 360         }
 361         case ETYPE_ARP: {
 362                 // We can only send to ARP ring if it exists
 363                 if (0 != (task->protocols_mask & (1 << ARP))) {
 364                         proto = ARP;
 365                 } else {
 366                         proto = IPV4;
 367                 }
 368                 worker = get_worker(task, packet);
 369                 break;
 370         }
 371         default:
 372                 plogx_warn("Error in ETYPE_8021ad: ether_type = %#06x\n", packet->qp.qinq_hdr.ether_type);
 373                 return OUT_DISCARD;
 374         }
 375
 376         return worker + proto * task->nb_worker_threads;
 377 }