VNFs/DPPD-PROX/handle_lb_qinq.c

   1 /*
   2 // Copyright (c) 2010-2017 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //     http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include <string.h>
  18
  19 #include <rte_mbuf.h>
  20 #include <rte_ip.h>
  21 #include <rte_byteorder.h>
  22 #include <rte_version.h>
  23 #include <rte_hash_crc.h>
  24
  25 #include "prox_malloc.h"
  26 #include "task_base.h"
  27 #include "tx_pkt.h"
  28 #include "rx_pkt.h"
  29 #include "etypes.h"
  30 #include "log.h"
  31 #include "quit.h"
  32 #include "qinq.h"
  33 #include "lconf.h"
  34 #include "prefetch.h"
  35 #include "defines.h"
  36 #include "prox_cfg.h"
  37 #include "hash_utils.h"
  38 #include "handle_lb_net.h"
  39 #include "toeplitz.h"
  40
  41 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
  42 #define RTE_CACHE_LINE_SIZE CACHE_LINE_SIZE
  43 #endif
  44
  45 /* Load balancing based on one byte, figures out what type of packet
  46    is passed and depending on the type, pass the packet to the correct
  47    worker thread. If an unsupported packet type is used, the packet is
  48    simply dropped. This Load balancer can only handling QinQ packets
  49    (i.e. packets comming from the vCPE). */
  50 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
  51 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts);
  52
  53 struct task_lb_qinq {
  54         struct task_base        base;
  55         uint8_t                 *worker_table;
  56         uint8_t                 bit_mask;
  57         uint8_t                 protocols_mask;
  58         uint8_t                 nb_worker_threads;
  59         uint16_t                qinq_tag;
  60 };
  61
  62 static void init_task_lb_qinq(struct task_base *tbase, struct task_args *targ)
  63 {
  64         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
  65         const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
  66
  67         task->qinq_tag = targ->qinq_tag;
  68         task->nb_worker_threads = targ->nb_worker_threads;
  69         task->bit_mask = rte_is_power_of_2(targ->nb_worker_threads) ? targ->nb_worker_threads - 1 : 0xff;
  70
  71         /* The load distributor is sending to a set of cores. These
  72            cores are responsible for handling a set of flows
  73            identified by a qinq tag. The load distributor identifies
  74            the flows and forwards them to the appropriate worker. The
  75            mapping from flow to worker is stored within the
  76            work_table. Build the worker_table by asking each worker
  77            which flows are handled. */
  78
  79         task->worker_table = prox_zmalloc(0x1000000, socket_id);
  80         for (int i = 0; i < targ->nb_worker_threads; ++i) {
  81                 struct core_task ct = targ->core_task_set[0].core_task[i];
  82                 struct task_args *t = core_targ_get(ct.core, ct.task);
  83
  84                 PROX_PANIC(t->task_init->flow_iter.beg == NULL,
  85                            "Load distributor can't find flows owned by destination worker %d\n", i);
  86
  87                 struct flow_iter *it = &t->task_init->flow_iter;
  88
  89                 int cnt = 0;
  90                 for (it->beg(it, t); !it->is_end(it, t); it->next(it, t)) {
  91                         uint16_t svlan = it->get_svlan(it, t);
  92                         uint16_t cvlan = it->get_cvlan(it, t);
  93
  94                         task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)] = i;
  95                 }
  96
  97         }
  98
  99         /* Check which protocols we are allowed to send to worker tasks */
 100         for (int i = 0; i < MAX_PROTOCOLS; ++i) {
 101                 int is_active = !!targ->core_task_set[i].n_elems;
 102                 task->protocols_mask |= is_active << i;
 103         }
 104         plog_info("\t\ttask_lb_qinq protocols_mask = 0x%x\n", task->protocols_mask);
 105
 106         if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_RSS)
 107                 tbase->flags |=  BASE_FLAG_LUT_QINQ_RSS;
 108         if (targ->task_init->flag_features & TASK_FEATURE_LUT_QINQ_HASH)
 109                 tbase->flags |=  BASE_FLAG_LUT_QINQ_HASH;
 110         plog_info("\t\ttask_lb_qinq flags = 0x%x\n", tbase->flags);
 111 }
 112
 113 static struct task_init task_init_lb_qinq = {
 114         .mode_str = "lbqinq",
 115         .init = init_task_lb_qinq,
 116         .handle = handle_lb_qinq_bulk,
 117         .size = sizeof(struct task_lb_qinq)
 118 };
 119
 120 /*
 121         Add correct port id to mbufs coming from a DPDK ring port in the loadbalancer.
 122         For the split-bng using DPDK rings between the vSwitch and the VMs
 123         we need to know the port from which a packet was received.
 124         The ring PMD in dpdk does not update the port field in the mbuf
 125         and thus we have no control over the port numbers that are being used.
 126         This submode allows the loadbalancer to set the port number on which it
 127         received the mbuf.
 128 */
 129 static struct task_init task_init_lb_qinq_set_port = {
 130         .mode_str = "lbqinq",
 131         .sub_mode_str = "lut_qinq_set_port",
 132         .init = init_task_lb_qinq,
 133         .handle = handle_lb_qinq_bulk_set_port,
 134         .size = sizeof(struct task_lb_qinq)
 135 };
 136
 137 /*
 138         Load Balance on Hash of combination of cvlan and svlan
 139 */
 140 static struct task_init task_init_lb_qinq_hash_friend = {
 141         .mode_str = "lbqinq",
 142         .sub_mode_str ="lut_qinq_hash_friend",
 143         .init = init_task_lb_qinq,
 144         .handle = handle_lb_qinq_bulk,
 145         .flag_features = TASK_FEATURE_LUT_QINQ_HASH,
 146         .size = sizeof(struct task_lb_qinq)
 147 };
 148
 149 /*
 150         Load Balance on rss of combination of cvlan and svlan.
 151         This could be used to compare with HW implementations.
 152 */
 153 static struct task_init task_init_lb_qinq_rss_friend = {
 154         .mode_str = "lbqinq",
 155         .sub_mode_str ="lut_qinq_rss_friend",
 156         .init = init_task_lb_qinq,
 157         .handle = handle_lb_qinq_bulk,
 158         .flag_features = TASK_FEATURE_LUT_QINQ_RSS,
 159         .size = sizeof(struct task_lb_qinq)
 160 };
 161
 162 __attribute__((constructor)) static void reg_task_lb_qinq(void)
 163 {
 164         reg_task(&task_init_lb_qinq);
 165         reg_task(&task_init_lb_qinq_hash_friend);
 166         reg_task(&task_init_lb_qinq_rss_friend);
 167         reg_task(&task_init_lb_qinq_set_port);
 168 }
 169
 170 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf);
 171
 172 int handle_lb_qinq_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
 173 {
 174         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
 175         uint8_t out[MAX_PKT_BURST];
 176         uint16_t j;
 177
 178         prefetch_first(mbufs, n_pkts);
 179
 180         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
 181 #ifdef PROX_PREFETCH_OFFSET
 182                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
 183                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
 184 #endif
 185                 out[j] = handle_lb_qinq(task, mbufs[j]);
 186         }
 187 #ifdef PROX_PREFETCH_OFFSET
 188         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
 189         for (; j < n_pkts; ++j) {
 190                 out[j] = handle_lb_qinq(task, mbufs[j]);
 191         }
 192 #endif
 193
 194         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
 195 }
 196
 197 int handle_lb_qinq_bulk_set_port(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
 198 {
 199         struct task_lb_qinq *task = (struct task_lb_qinq *)tbase;
 200         uint8_t out[MAX_PKT_BURST];
 201         uint16_t j;
 202 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
 203         uint32_t port_id = mbufs[0]->pkt.in_port;
 204 #else
 205         uint32_t port_id = mbufs[0]->port;
 206 #endif
 207
 208         if (tbase->rx_pkt == rx_pkt_hw) {
 209                 port_id = tbase->rx_params_hw.last_read_portid + tbase->rx_params_hw.nb_rxports;
 210                 port_id = ( port_id - 1 ) % tbase->rx_params_hw.nb_rxports;
 211                 port_id = tbase->rx_params_hw.rx_pq[port_id].port;
 212         } else if (tbase->rx_pkt == rx_pkt_hw1) {
 213                 port_id = tbase->rx_params_hw1.rx_pq.port;
 214         }
 215
 216         prefetch_first(mbufs, n_pkts);
 217
 218         for (j = 0; j + PREFETCH_OFFSET < n_pkts; ++j) {
 219 #ifdef PROX_PREFETCH_OFFSET
 220                 PREFETCH0(mbufs[j + PREFETCH_OFFSET]);
 221                 PREFETCH0(rte_pktmbuf_mtod(mbufs[j + PREFETCH_OFFSET - 1], void *));
 222 #endif
 223 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
 224                 mbufs[j]->pkt.in_port = port_id;
 225 #else
 226                 mbufs[j]->port = port_id;
 227 #endif
 228                 out[j] = handle_lb_qinq(task, mbufs[j]);
 229         }
 230 #ifdef PROX_PREFETCH_OFFSET
 231         PREFETCH0(rte_pktmbuf_mtod(mbufs[n_pkts - 1], void *));
 232         for (; j < n_pkts; ++j) {
 233 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
 234                 mbufs[j]->pkt.in_port = port_id;
 235 #else
 236                 mbufs[j]->port = port_id;
 237 #endif
 238                 out[j] = handle_lb_qinq(task, mbufs[j]);
 239         }
 240 #endif
 241
 242         return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
 243 }
 244
 245 struct qinq_packet {
 246         struct qinq_hdr qinq_hdr;
 247         union {
 248                 struct ipv4_hdr ipv4_hdr;
 249                 struct ipv6_hdr ipv6_hdr;
 250         };
 251 } __attribute__((packed));
 252
 253 struct qinq_packet_data {
 254         struct ether_addr  d_addr;
 255         struct ether_addr  s_addr;
 256         uint64_t qinq;
 257 } __attribute__((packed));
 258
 259 struct ether_packet {
 260         struct ether_hdr ether_hdr;
 261         union {
 262                 struct ipv4_hdr ipv4_hdr;
 263                 struct ipv6_hdr ipv6_hdr;
 264         };
 265 } __attribute__((packed));
 266
 267 struct cpe_packet {
 268         union {
 269                 struct qinq_packet  qp;
 270                 struct ether_packet ep;
 271                 struct qinq_packet_data qd;
 272         };
 273 };
 274
 275 static inline uint8_t get_worker(struct task_lb_qinq *task, struct cpe_packet *packet)
 276 {
 277         uint8_t worker = 0;
 278         if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_HASH) {
 279                 // Load Balance on Hash of combination of cvlan and svlan
 280                 uint64_t qinq_net = packet->qd.qinq;
 281                 qinq_net = qinq_net & 0xFF0F0000FF0F0000;       // Mask Proto and QoS bits
 282                 if (task->bit_mask != 0xff) {
 283                         worker = rte_hash_crc(&qinq_net,8,0) & task->bit_mask;
 284                 }
 285                 else {
 286                         worker = rte_hash_crc(&qinq_net,8,0) % task->nb_worker_threads;
 287                 }
 288                 plogx_dbg("Sending packet svlan=%x, cvlan=%x, pseudo_qinq=%lx to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq_net, worker);
 289         } else if (((struct task_base *)task)->flags & BASE_FLAG_LUT_QINQ_RSS){
 290                 // Load Balance on rss of combination of cvlan and svlan
 291                 uint32_t qinq = (packet->qp.qinq_hdr.cvlan.vlan_tci & 0xFF0F) << 16;
 292                 uint32_t rss = toeplitz_hash((uint8_t *)&qinq, 4);
 293                 if (task->bit_mask != 0xff) {
 294                         worker = rss & task->bit_mask;
 295                 } else {
 296                         worker = (0x1ff & rss) % task->nb_worker_threads;
 297                 }
 298                 plogx_dbg("Sending packet svlan=%x, cvlan=%x, rss_input=%x, rss=%x to worker %d\n", rte_bswap16(0xFF0F & packet->qp.qinq_hdr.svlan.vlan_tci), rte_bswap16(0xFF0F & packet->qp.qinq_hdr.cvlan.vlan_tci), qinq, rss, worker);
 299         } else {
 300                 uint16_t svlan = packet->qp.qinq_hdr.svlan.vlan_tci;
 301                 uint16_t cvlan = packet->qp.qinq_hdr.cvlan.vlan_tci;
 302                 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
 303                 worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
 304
 305                 const size_t pos = offsetof(struct cpe_packet, qp.qinq_hdr.cvlan.vlan_tci);
 306                 plogx_dbg("qinq = %u, worker = %u, pos = %lu\n", rte_be_to_cpu_16(cvlan), worker, pos);
 307         }
 308         return worker;
 309 }
 310
 311 static inline uint8_t handle_lb_qinq(struct task_lb_qinq *task, struct rte_mbuf *mbuf)
 312 {
 313         struct cpe_packet *packet = rte_pktmbuf_mtod(mbuf, struct cpe_packet*);
 314         if (packet->ep.ether_hdr.ether_type == ETYPE_IPv4) {
 315                 if (unlikely((packet->ep.ipv4_hdr.version_ihl >> 4) != 4)) {
 316                         plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->ep.ipv4_hdr.version_ihl);
 317                         return OUT_DISCARD;
 318                 }
 319                 /* use 24 bits from the IP, clients are from the 10.0.0.0/8 network */
 320                 const uint32_t tmp = rte_bswap32(packet->ep.ipv4_hdr.src_addr) & 0x00FFFFFF;
 321                 const uint32_t svlan = rte_bswap16(tmp >> 12);
 322                 const uint32_t cvlan = rte_bswap16(tmp & 0x0FFF);
 323                 prefetch_nta(&task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)]);
 324                 uint8_t worker = task->worker_table[PKT_TO_LUTQINQ(svlan, cvlan)];
 325                 return worker + IPV4 * task->nb_worker_threads;
 326         }
 327         else if (unlikely(packet->qp.qinq_hdr.svlan.eth_proto != task->qinq_tag)) {
 328                 /* might receive LLDP from the L2 switch... */
 329                 if (packet->qp.qinq_hdr.svlan.eth_proto != ETYPE_LLDP) {
 330                         plogdx_err(mbuf, "Invalid packet for LB in QinQ mode\n");
 331                 }
 332                 return OUT_DISCARD;
 333         }
 334
 335         uint8_t worker = 0;
 336         uint8_t proto = 0xFF;
 337         switch (packet->qp.qinq_hdr.ether_type) {
 338         case ETYPE_IPv4: {
 339                 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 4)) {
 340                         plogx_err("Invalid Version %u for ETYPE_IPv4\n", packet->qp.ipv4_hdr.version_ihl);
 341                         return OUT_DISCARD;
 342                 }
 343                 worker = get_worker(task, packet);
 344                 proto = IPV4;
 345                 break;
 346         }
 347         case ETYPE_IPv6: {
 348                 if (unlikely((packet->qp.ipv4_hdr.version_ihl >> 4) != 6)) {
 349                         plogx_err("Invalid Version %u for ETYPE_IPv6\n", packet->qp.ipv4_hdr.version_ihl);
 350                         return OUT_DISCARD;
 351                 }
 352                 /* Use IP Destination when IPV6 QinQ */
 353                 if (task->bit_mask != 0xff) {
 354                         worker = ((uint8_t *)packet)[61] & task->bit_mask;
 355                 }
 356                 else {
 357                         worker = ((uint8_t *)packet)[61] % task->nb_worker_threads;
 358                 }
 359                 proto = IPV6;
 360                 break;
 361         }
 362         case ETYPE_ARP: {
 363                 // We can only send to ARP ring if it exists
 364                 if (0 != (task->protocols_mask & (1 << ARP))) {
 365                         proto = ARP;
 366                 } else {
 367                         proto = IPV4;
 368                 }
 369                 worker = get_worker(task, packet);
 370                 break;
 371         }
 372         default:
 373                 plogx_warn("Error in ETYPE_8021ad: ether_type = %#06x\n", packet->qp.qinq_hdr.ether_type);
 374                 return OUT_DISCARD;
 375         }
 376
 377         return worker + proto * task->nb_worker_threads;
 378 }