These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / infiniband / hw / mlx4 / qp.c
index 02fc91c..13eaaf4 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
+#include <linux/vmalloc.h>
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
@@ -111,7 +112,7 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_ATOMIC_FETCH_AND_ADD]            = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
        [IB_WR_SEND_WITH_INV]                   = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
        [IB_WR_LOCAL_INV]                       = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
-       [IB_WR_FAST_REG_MR]                     = cpu_to_be32(MLX4_OPCODE_FMR),
+       [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
        [IB_WR_BIND_MW]                         = cpu_to_be32(MLX4_OPCODE_BIND_MW),
@@ -617,6 +618,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
        return 0;
 }
 
+static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
+                                   struct mlx4_ib_qp *qp)
+{
+       mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+       mlx4_counter_free(dev->dev, qp->counter_index->index);
+       list_del(&qp->counter_index->list);
+       mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+       kfree(qp->counter_index);
+       qp->counter_index = NULL;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
@@ -746,9 +759,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        } else {
                qp->sq_no_prefetch = 0;
 
-               if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
-                       qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
                if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
                        qp->flags |= MLX4_IB_QP_LSO;
 
@@ -786,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        goto err_mtt;
 
-               qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-               qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+               qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+               if (!qp->sq.wrid)
+                       qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+                                               gfp, PAGE_KERNEL);
+               qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+               if (!qp->rq.wrid)
+                       qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+                                               gfp, PAGE_KERNEL);
                if (!qp->sq.wrid || !qp->rq.wrid) {
                        err = -ENOMEM;
                        goto err_wrid;
@@ -822,6 +838,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        goto err_proxy;
        }
 
+       if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+               qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
        err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
        if (err)
                goto err_qpn;
@@ -874,8 +893,8 @@ err_wrid:
                if (qp_has_rq(init_attr))
                        mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
        } else {
-               kfree(qp->sq.wrid);
-               kfree(qp->rq.wrid);
+               kvfree(qp->sq.wrid);
+               kvfree(qp->rq.wrid);
        }
 
 err_mtt:
@@ -1050,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
                                              &qp->db);
                ib_umem_release(qp->umem);
        } else {
-               kfree(qp->sq.wrid);
-               kfree(qp->rq.wrid);
+               kvfree(qp->sq.wrid);
+               kvfree(qp->rq.wrid);
                if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
                    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
                        free_proxy_bufs(&dev->ib_dev, qp);
@@ -1086,6 +1105,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 {
        struct mlx4_ib_qp *qp = NULL;
        int err;
+       int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
        u16 xrcdn = 0;
        gfp_t gfp;
 
@@ -1109,8 +1129,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        }
 
        if (init_attr->create_flags &&
-           (udata ||
-            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
+           ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
+            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+                                          MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+                                          MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
              init_attr->qp_type != IB_QPT_UD) ||
             ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
              init_attr->qp_type > IB_QPT_GSI)))
@@ -1189,6 +1211,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
                mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
        }
 
+       if (mqp->counter_index)
+               mlx4_ib_free_qp_counter(dev, mqp);
+
        pd = get_pd(mqp);
        destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
 
@@ -1292,14 +1317,18 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
                path->static_rate = 0;
 
        if (ah->ah_flags & IB_AH_GRH) {
-               if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
+               int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev,
+                                                                     port,
+                                                                     ah->grh.sgid_index);
+
+               if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
                        pr_err("sgid_index (%u) too large. max is %d\n",
-                              ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
+                              real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
                        return -1;
                }
 
                path->grh_mylmc |= 1 << 7;
-               path->mgid_index = ah->grh.sgid_index;
+               path->mgid_index = real_sgid_index;
                path->hop_limit  = ah->grh.hop_limit;
                path->tclass_flowlabel =
                        cpu_to_be32((ah->grh.traffic_class << 20) |
@@ -1387,11 +1416,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
                         enum ib_qp_attr_mask qp_attr_mask,
                         struct mlx4_ib_qp *mqp,
-                        struct mlx4_qp_path *path, u8 port)
+                        struct mlx4_qp_path *path, u8 port,
+                        u16 vlan_id, u8 *smac)
 {
        return _mlx4_set_path(dev, &qp->ah_attr,
-                             mlx4_mac_to_u64((u8 *)qp->smac),
-                             (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+                             mlx4_mac_to_u64(smac),
+                             vlan_id,
                              path, &mqp->pri, port);
 }
 
@@ -1402,9 +1432,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
                             struct mlx4_qp_path *path, u8 port)
 {
        return _mlx4_set_path(dev, &qp->alt_ah_attr,
-                             mlx4_mac_to_u64((u8 *)qp->alt_smac),
-                             (qp_attr_mask & IB_QP_ALT_VID) ?
-                             qp->alt_vlan_id : 0xffff,
+                             0,
+                             0xffff,
                              path, &mqp->alt, port);
 }
 
@@ -1420,7 +1449,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
        }
 }
 
-static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev,
+                                   struct mlx4_ib_qp *qp,
                                    struct mlx4_qp_context *context)
 {
        u64 u64_mac;
@@ -1443,6 +1473,40 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *
        return 0;
 }
 
+static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+       struct counter_index *new_counter_index;
+       int err;
+       u32 tmp_idx;
+
+       if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) !=
+           IB_LINK_LAYER_ETHERNET ||
+           !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) ||
+           !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
+               return 0;
+
+       err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+       if (err)
+               return err;
+
+       new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL);
+       if (!new_counter_index) {
+               mlx4_counter_free(dev->dev, tmp_idx);
+               return -ENOMEM;
+       }
+
+       new_counter_index->index = tmp_idx;
+       new_counter_index->allocated = 1;
+       qp->counter_index = new_counter_index;
+
+       mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+       list_add_tail(&new_counter_index->list,
+                     &dev->counters_table[qp->port - 1].counters_list);
+       mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+       return 0;
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1456,6 +1520,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        int sqd_event;
        int steer_qp = 0;
        int err = -EINVAL;
+       int counter_index;
 
        /* APM is not supported under RoCE */
        if (attr_mask & IB_QP_ALT_PATH &&
@@ -1515,6 +1580,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
        context->sq_size_stride |= qp->sq.wqe_shift - 4;
 
+       if (new_state == IB_QPS_RESET && qp->counter_index)
+               mlx4_ib_free_qp_counter(dev, qp);
+
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
                context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
                context->xrcd = cpu_to_be32((u32) qp->xrcdn);
@@ -1539,12 +1607,27 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        }
 
        if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
-               if (dev->counters[qp->port - 1] != -1) {
-                       context->pri_path.counter_index =
-                                               dev->counters[qp->port - 1];
+               err = create_qp_lb_counter(dev, qp);
+               if (err)
+                       goto out;
+
+               counter_index =
+                       dev->counters_table[qp->port - 1].default_counter;
+               if (qp->counter_index)
+                       counter_index = qp->counter_index->index;
+
+               if (counter_index != -1) {
+                       context->pri_path.counter_index = counter_index;
                        optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+                       if (qp->counter_index) {
+                               context->pri_path.fl |=
+                                       MLX4_FL_ETH_SRC_CHECK_MC_LB;
+                               context->pri_path.vlan_control |=
+                                       MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+                       }
                } else
-                       context->pri_path.counter_index = 0xff;
+                       context->pri_path.counter_index =
+                               MLX4_SINK_COUNTER_INDEX(dev->dev);
 
                if (qp->flags & MLX4_IB_QP_NETIF) {
                        mlx4_ib_steer_qp_reg(dev, qp, 1);
@@ -1560,9 +1643,33 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        }
 
        if (attr_mask & IB_QP_AV) {
+               u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+                       attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+               union ib_gid gid;
+               struct ib_gid_attr gid_attr;
+               u16 vlan = 0xffff;
+               u8 smac[ETH_ALEN];
+               int status = 0;
+
+               if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+                   attr->ah_attr.ah_flags & IB_AH_GRH) {
+                       int index = attr->ah_attr.grh.sgid_index;
+
+                       status = ib_get_cached_gid(ibqp->device, port_num,
+                                                  index, &gid, &gid_attr);
+                       if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
+                               status = -ENOENT;
+                       if (!status && gid_attr.ndev) {
+                               vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+                               memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
+                               dev_put(gid_attr.ndev);
+                       }
+               }
+               if (status)
+                       goto out;
+
                if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
-                                 attr_mask & IB_QP_PORT ?
-                                 attr->port_num : qp->port))
+                                 port_num, vlan, smac))
                        goto out;
 
                optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1699,7 +1806,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                        if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
                            qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
                            qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
-                               err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+                               err = handle_eth_ud_smac_index(dev, qp, context);
                                if (err) {
                                        err = -EINVAL;
                                        goto out;
@@ -1843,6 +1950,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                }
        }
 out:
+       if (err && qp->counter_index)
+               mlx4_ib_free_qp_counter(dev, qp);
        if (err && steer_qp)
                mlx4_ib_steer_qp_reg(dev, qp, 0);
        kfree(context);
@@ -2031,14 +2140,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 }
 
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
-                                 struct ib_send_wr *wr,
+                                 struct ib_ud_wr *wr,
                                  void *wqe, unsigned *mlx_seg_len)
 {
        struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
        struct ib_device *ib_dev = &mdev->ib_dev;
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
-       struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+       struct mlx4_ib_ah *ah = to_mah(wr->ah);
        u16 pkey;
        u32 qkey;
        int send_size;
@@ -2046,13 +2155,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        int spc;
        int i;
 
-       if (wr->opcode != IB_WR_SEND)
+       if (wr->wr.opcode != IB_WR_SEND)
                return -EINVAL;
 
        send_size = 0;
 
-       for (i = 0; i < wr->num_sge; ++i)
-               send_size += wr->sg_list[i].length;
+       for (i = 0; i < wr->wr.num_sge; ++i)
+               send_size += wr->wr.sg_list[i].length;
 
        /* for proxy-qp0 sends, need to add in size of tunnel header */
        /* for tunnel-qp0 sends, tunnel header is already in s/g list */
@@ -2077,11 +2186,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        mlx->rlid = sqp->ud_header.lrh.destination_lid;
 
        sqp->ud_header.lrh.virtual_lane    = 0;
-       sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+       sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
        ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
-               sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+               sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        else
                sqp->ud_header.bth.destination_qpn =
                        cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
@@ -2153,14 +2262,14 @@ static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
        }
 }
 
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
        struct ib_device *ib_dev = sqp->qp.ibqp.device;
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_ctrl_seg *ctrl = wqe;
        struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
-       struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+       struct mlx4_ib_ah *ah = to_mah(wr->ah);
        union ib_gid sgid;
        u16 pkey;
        int send_size;
@@ -2174,8 +2283,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
        bool is_grh;
 
        send_size = 0;
-       for (i = 0; i < wr->num_sge; ++i)
-               send_size += wr->sg_list[i].length;
+       for (i = 0; i < wr->wr.num_sge; ++i)
+               send_size += wr->wr.sg_list[i].length;
 
        is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
@@ -2192,7 +2301,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                } else  {
                        err = ib_get_cached_gid(ib_dev,
                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
-                                               ah->av.ib.gid_index, &sgid);
+                                               ah->av.ib.gid_index, &sgid,
+                                               NULL);
+                       if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
+                               err = -ENOENT;
                        if (err)
                                return err;
                }
@@ -2234,7 +2346,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                        ib_get_cached_gid(ib_dev,
                                          be32_to_cpu(ah->av.ib.port_pd) >> 24,
                                          ah->av.ib.gid_index,
-                                         &sqp->ud_header.grh.source_gid);
+                                         &sqp->ud_header.grh.source_gid, NULL);
                }
                memcpy(sqp->ud_header.grh.destination_gid.raw,
                       ah->av.ib.dgid, 16);
@@ -2252,7 +2364,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                mlx->rlid = sqp->ud_header.lrh.destination_lid;
        }
 
-       switch (wr->opcode) {
+       switch (wr->wr.opcode) {
        case IB_WR_SEND:
                sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
                sqp->ud_header.immediate_present = 0;
@@ -2260,7 +2372,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data    = wr->ex.imm_data;
+               sqp->ud_header.immediate_data    = wr->wr.ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -2303,16 +2415,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
                        sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        }
-       sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+       sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
        if (!sqp->qp.ibqp.qp_num)
                ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
        else
-               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
-       sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+       sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-       sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
-                                              sqp->qkey : wr->wr.ud.remote_qkey);
+       sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+                                              sqp->qkey : wr->remote_qkey);
        sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
 
        header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -2400,43 +2512,39 @@ static __be32 convert_access(int acc)
                cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
 }
 
-static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
+                       struct ib_reg_wr *wr)
 {
-       struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
-       int i;
-
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
-               mfrpl->mapped_page_list[i] =
-                       cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
-                                   MLX4_MTT_FLAG_PRESENT);
+       struct mlx4_ib_mr *mr = to_mmr(wr->mr);
 
-       fseg->flags             = convert_access(wr->wr.fast_reg.access_flags);
-       fseg->mem_key           = cpu_to_be32(wr->wr.fast_reg.rkey);
-       fseg->buf_list          = cpu_to_be64(mfrpl->map);
-       fseg->start_addr        = cpu_to_be64(wr->wr.fast_reg.iova_start);
-       fseg->reg_len           = cpu_to_be64(wr->wr.fast_reg.length);
+       fseg->flags             = convert_access(wr->access);
+       fseg->mem_key           = cpu_to_be32(wr->key);
+       fseg->buf_list          = cpu_to_be64(mr->page_map);
+       fseg->start_addr        = cpu_to_be64(mr->ibmr.iova);
+       fseg->reg_len           = cpu_to_be64(mr->ibmr.length);
        fseg->offset            = 0; /* XXX -- is this just for ZBVA? */
-       fseg->page_size         = cpu_to_be32(wr->wr.fast_reg.page_shift);
+       fseg->page_size         = cpu_to_be32(ilog2(mr->ibmr.page_size));
        fseg->reserved[0]       = 0;
        fseg->reserved[1]       = 0;
 }
 
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
+static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
+               struct ib_bind_mw_wr *wr)
 {
        bseg->flags1 =
-               convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
+               convert_access(wr->bind_info.mw_access_flags) &
                cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
                            MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
                            MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
        bseg->flags2 = 0;
-       if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
+       if (wr->mw->type == IB_MW_TYPE_2)
                bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
-       if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
+       if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
                bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
-       bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
-       bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
-       bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
-       bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
+       bseg->new_rkey = cpu_to_be32(wr->rkey);
+       bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
+       bseg->addr = cpu_to_be64(wr->bind_info.addr);
+       bseg->length = cpu_to_be64(wr->bind_info.length);
 }
 
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
@@ -2453,46 +2561,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
        rseg->reserved = 0;
 }
 
-static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
+               struct ib_atomic_wr *wr)
 {
-       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
-       } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+       if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+               aseg->swap_add = cpu_to_be64(wr->swap);
+               aseg->compare  = cpu_to_be64(wr->compare_add);
+       } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+               aseg->swap_add = cpu_to_be64(wr->compare_add);
+               aseg->compare  = cpu_to_be64(wr->compare_add_mask);
        } else {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->swap_add = cpu_to_be64(wr->compare_add);
                aseg->compare  = 0;
        }
 
 }
 
 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
-                                 struct ib_send_wr *wr)
+                                 struct ib_atomic_wr *wr)
 {
-       aseg->swap_add          = cpu_to_be64(wr->wr.atomic.swap);
-       aseg->swap_add_mask     = cpu_to_be64(wr->wr.atomic.swap_mask);
-       aseg->compare           = cpu_to_be64(wr->wr.atomic.compare_add);
-       aseg->compare_mask      = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+       aseg->swap_add          = cpu_to_be64(wr->swap);
+       aseg->swap_add_mask     = cpu_to_be64(wr->swap_mask);
+       aseg->compare           = cpu_to_be64(wr->compare_add);
+       aseg->compare_mask      = cpu_to_be64(wr->compare_add_mask);
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-                            struct ib_send_wr *wr)
+                            struct ib_ud_wr *wr)
 {
-       memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
-       dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
-       dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
-       dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
-       memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
+       memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
+       dseg->dqpn = cpu_to_be32(wr->remote_qpn);
+       dseg->qkey = cpu_to_be32(wr->remote_qkey);
+       dseg->vlan = to_mah(wr->ah)->av.eth.vlan;
+       memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6);
 }
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
                                    struct mlx4_wqe_datagram_seg *dseg,
-                                   struct ib_send_wr *wr,
+                                   struct ib_ud_wr *wr,
                                    enum mlx4_ib_qp_type qpt)
 {
-       union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+       union mlx4_ext_av *av = &to_mah(wr->ah)->av;
        struct mlx4_av sqp_av = {0};
        int port = *((u8 *) &av->ib.port_pd) & 0x3;
 
@@ -2511,18 +2620,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
        dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
 
-static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
 {
        struct mlx4_wqe_inline_seg *inl = wqe;
        struct mlx4_ib_tunnel_header hdr;
-       struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+       struct mlx4_ib_ah *ah = to_mah(wr->ah);
        int spc;
        int i;
 
        memcpy(&hdr.av, &ah->av, sizeof hdr.av);
-       hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
-       hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
-       hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+       hdr.remote_qpn = cpu_to_be32(wr->remote_qpn);
+       hdr.pkey_index = cpu_to_be16(wr->pkey_index);
+       hdr.qkey = cpu_to_be32(wr->remote_qkey);
        memcpy(hdr.mac, ah->av.eth.mac, 6);
        hdr.vlan = ah->av.eth.vlan;
 
@@ -2594,22 +2703,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
        dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
                         struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
                         __be32 *lso_hdr_sz, __be32 *blh)
 {
-       unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
+       unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
 
        if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
                *blh = cpu_to_be32(1 << 6);
 
        if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
-                    wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+                    wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
                return -EINVAL;
 
-       memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+       memcpy(wqe->header, wr->header, wr->hlen);
 
-       *lso_hdr_sz  = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
+       *lso_hdr_sz  = cpu_to_be32(wr->mss << 16 | wr->hlen);
        *lso_seg_len = halign;
        return 0;
 }
@@ -2708,11 +2817,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
                        case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
-                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
+                               set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+                                             atomic_wr(wr)->rkey);
                                wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 
-                               set_atomic_seg(wqe, wr);
+                               set_atomic_seg(wqe, atomic_wr(wr));
                                wqe  += sizeof (struct mlx4_wqe_atomic_seg);
 
                                size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2721,11 +2830,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                break;
 
                        case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
-                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
+                               set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+                                             atomic_wr(wr)->rkey);
                                wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 
-                               set_masked_atomic_seg(wqe, wr);
+                               set_masked_atomic_seg(wqe, atomic_wr(wr));
                                wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
 
                                size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2736,8 +2845,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_READ:
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                wqe  += sizeof (struct mlx4_wqe_raddr_seg);
                                size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
                                break;
@@ -2750,18 +2859,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
                                break;
 
-                       case IB_WR_FAST_REG_MR:
+                       case IB_WR_REG_MR:
                                ctrl->srcrb_flags |=
                                        cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_fmr_seg(wqe, wr);
-                               wqe  += sizeof (struct mlx4_wqe_fmr_seg);
-                               size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+                               set_reg_seg(wqe, reg_wr(wr));
+                               wqe  += sizeof(struct mlx4_wqe_fmr_seg);
+                               size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
                                break;
 
                        case IB_WR_BIND_MW:
                                ctrl->srcrb_flags |=
                                        cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_bind_seg(wqe, wr);
+                               set_bind_seg(wqe, bind_mw_wr(wr));
                                wqe  += sizeof(struct mlx4_wqe_bind_seg);
                                size += sizeof(struct mlx4_wqe_bind_seg) / 16;
                                break;
@@ -2772,7 +2881,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_TUN_SMI_OWNER:
-                       err =  build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+                       err =  build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+                                       ctrl, &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -2783,19 +2893,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                case MLX4_IB_QPT_TUN_SMI:
                case MLX4_IB_QPT_TUN_GSI:
                        /* this is a UD qp used in MAD responses to slaves. */
-                       set_datagram_seg(wqe, wr);
+                       set_datagram_seg(wqe, ud_wr(wr));
                        /* set the forced-loopback bit in the data seg av */
                        *(__be32 *) wqe |= cpu_to_be32(0x80000000);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
                        break;
                case MLX4_IB_QPT_UD:
-                       set_datagram_seg(wqe, wr);
+                       set_datagram_seg(wqe, ud_wr(wr));
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
                        if (wr->opcode == IB_WR_LSO) {
-                               err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
+                               err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
+                                               &lso_hdr_sz, &blh);
                                if (unlikely(err)) {
                                        *bad_wr = wr;
                                        goto out;
@@ -2807,7 +2918,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_PROXY_SMI_OWNER:
-                       err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+                       err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+                                       ctrl, &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -2818,7 +2930,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        add_zero_len_inline(wqe);
                        wqe += 16;
                        size++;
-                       build_tunnel_header(wr, wqe, &seglen);
+                       build_tunnel_header(ud_wr(wr), wqe, &seglen);
                        wqe  += seglen;
                        size += seglen / 16;
                        break;
@@ -2828,18 +2940,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         * In this case we first add a UD segment targeting
                         * the tunnel qp, and then add a header with address
                         * information */
-                       set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+                       set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
+                                               ud_wr(wr),
                                                qp->mlx4_ib_qp_type);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
-                       build_tunnel_header(wr, wqe, &seglen);
+                       build_tunnel_header(ud_wr(wr), wqe, &seglen);
                        wqe  += seglen;
                        size += seglen / 16;
                        break;
 
                case MLX4_IB_QPT_SMI:
                case MLX4_IB_QPT_GSI:
-                       err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+                       err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
+                                       &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;