These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / infiniband / hw / mlx4 / main.c
index cc64400..97d6878 100644 (file)
@@ -45,6 +45,9 @@
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include <net/bonding.h>
 
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/cmd.h>
@@ -74,13 +77,6 @@ static const char mlx4_ib_version[] =
        DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
        DRV_VERSION " (" DRV_RELDATE ")\n";
 
-struct update_gid_work {
-       struct work_struct      work;
-       union ib_gid            gids[128];
-       struct mlx4_ib_dev     *dev;
-       int                     port;
-};
-
 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
 
 static struct workqueue_struct *wq;
@@ -93,8 +89,6 @@ static void init_query_mad(struct ib_smp *mad)
        mad->method        = IB_MGMT_METHOD_GET;
 }
 
-static union ib_gid zgid;
-
 static int check_flow_steering_support(struct mlx4_dev *dev)
 {
        int eth_num_ports = 0;
@@ -131,15 +125,267 @@ static int num_ib_ports(struct mlx4_dev *dev)
        return ib_ports;
 }
 
+static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
+{
+       struct mlx4_ib_dev *ibdev = to_mdev(device);
+       struct net_device *dev;
+
+       rcu_read_lock();
+       dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
+
+       if (dev) {
+               if (mlx4_is_bonded(ibdev->dev)) {
+                       struct net_device *upper = NULL;
+
+                       upper = netdev_master_upper_dev_get_rcu(dev);
+                       if (upper) {
+                               struct net_device *active;
+
+                               active = bond_option_active_slave_get_rcu(netdev_priv(upper));
+                               if (active)
+                                       dev = active;
+                       }
+               }
+       }
+       if (dev)
+               dev_hold(dev);
+
+       rcu_read_unlock();
+       return dev;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+                              struct mlx4_ib_dev *ibdev,
+                              u8 port_num)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       int err;
+       struct mlx4_dev *dev = ibdev->dev;
+       int i;
+       union ib_gid *gid_tbl;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev);
+       if (IS_ERR(mailbox))
+               return -ENOMEM;
+
+       gid_tbl = mailbox->buf;
+
+       for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
+               memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
+
+       err = mlx4_cmd(dev, mailbox->dma,
+                      MLX4_SET_PORT_GID_TABLE << 8 | port_num,
+                      1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                      MLX4_CMD_WRAPPED);
+       if (mlx4_is_bonded(dev))
+               err += mlx4_cmd(dev, mailbox->dma,
+                               MLX4_SET_PORT_GID_TABLE << 8 | 2,
+                               1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+                               MLX4_CMD_WRAPPED);
+
+       mlx4_free_cmd_mailbox(dev, mailbox);
+       return err;
+}
+
+static int mlx4_ib_add_gid(struct ib_device *device,
+                          u8 port_num,
+                          unsigned int index,
+                          const union ib_gid *gid,
+                          const struct ib_gid_attr *attr,
+                          void **context)
+{
+       struct mlx4_ib_dev *ibdev = to_mdev(device);
+       struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+       struct mlx4_port_gid_table   *port_gid_table;
+       int free = -1, found = -1;
+       int ret = 0;
+       int hw_update = 0;
+       int i;
+       struct gid_entry *gids = NULL;
+
+       if (!rdma_cap_roce_gid_table(device, port_num))
+               return -EINVAL;
+
+       if (port_num > MLX4_MAX_PORTS)
+               return -EINVAL;
+
+       if (!context)
+               return -EINVAL;
+
+       port_gid_table = &iboe->gids[port_num - 1];
+       spin_lock_bh(&iboe->lock);
+       for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+               if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+                       found = i;
+                       break;
+               }
+               if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
+                       free = i; /* HW has space */
+       }
+
+       if (found < 0) {
+               if (free < 0) {
+                       ret = -ENOSPC;
+               } else {
+                       port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
+                       if (!port_gid_table->gids[free].ctx) {
+                               ret = -ENOMEM;
+                       } else {
+                               *context = port_gid_table->gids[free].ctx;
+                               memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+                               port_gid_table->gids[free].ctx->real_index = free;
+                               port_gid_table->gids[free].ctx->refcount = 1;
+                               hw_update = 1;
+                       }
+               }
+       } else {
+               struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
+               *context = ctx;
+               ctx->refcount++;
+       }
+       if (!ret && hw_update) {
+               gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+               if (!gids) {
+                       ret = -ENOMEM;
+               } else {
+                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+                               memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+               }
+       }
+       spin_unlock_bh(&iboe->lock);
+
+       if (!ret && hw_update) {
+               ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+               kfree(gids);
+       }
+
+       return ret;
+}
+
+static int mlx4_ib_del_gid(struct ib_device *device,
+                          u8 port_num,
+                          unsigned int index,
+                          void **context)
+{
+       struct gid_cache_context *ctx = *context;
+       struct mlx4_ib_dev *ibdev = to_mdev(device);
+       struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+       struct mlx4_port_gid_table   *port_gid_table;
+       int ret = 0;
+       int hw_update = 0;
+       struct gid_entry *gids = NULL;
+
+       if (!rdma_cap_roce_gid_table(device, port_num))
+               return -EINVAL;
+
+       if (port_num > MLX4_MAX_PORTS)
+               return -EINVAL;
+
+       port_gid_table = &iboe->gids[port_num - 1];
+       spin_lock_bh(&iboe->lock);
+       if (ctx) {
+               ctx->refcount--;
+               if (!ctx->refcount) {
+                       unsigned int real_index = ctx->real_index;
+
+                       memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
+                       kfree(port_gid_table->gids[real_index].ctx);
+                       port_gid_table->gids[real_index].ctx = NULL;
+                       hw_update = 1;
+               }
+       }
+       if (!ret && hw_update) {
+               int i;
+
+               gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+               if (!gids) {
+                       ret = -ENOMEM;
+               } else {
+                       for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+                               memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+               }
+       }
+       spin_unlock_bh(&iboe->lock);
+
+       if (!ret && hw_update) {
+               ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+               kfree(gids);
+       }
+       return ret;
+}
+
+int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
+                                   u8 port_num, int index)
+{
+       struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+       struct gid_cache_context *ctx = NULL;
+       union ib_gid gid;
+       struct mlx4_port_gid_table   *port_gid_table;
+       int real_index = -EINVAL;
+       int i;
+       int ret;
+       unsigned long flags;
+
+       if (port_num > MLX4_MAX_PORTS)
+               return -EINVAL;
+
+       if (mlx4_is_bonded(ibdev->dev))
+               port_num = 1;
+
+       if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
+               return index;
+
+       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+       if (ret)
+               return ret;
+
+       if (!memcmp(&gid, &zgid, sizeof(gid)))
+               return -EINVAL;
+
+       spin_lock_irqsave(&iboe->lock, flags);
+       port_gid_table = &iboe->gids[port_num - 1];
+
+       for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
+               if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+                       ctx = port_gid_table->gids[i].ctx;
+                       break;
+               }
+       if (ctx)
+               real_index = ctx->real_index;
+       spin_unlock_irqrestore(&iboe->lock, flags);
+       return real_index;
+}
+
 static int mlx4_ib_query_device(struct ib_device *ibdev,
-                               struct ib_device_attr *props)
+                               struct ib_device_attr *props,
+                               struct ib_udata *uhw)
 {
        struct mlx4_ib_dev *dev = to_mdev(ibdev);
        struct ib_smp *in_mad  = NULL;
        struct ib_smp *out_mad = NULL;
        int err = -ENOMEM;
        int have_ib_ports;
+       struct mlx4_uverbs_ex_query_device cmd;
+       struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
+       struct mlx4_clock_params clock_params;
+
+       if (uhw->inlen) {
+               if (uhw->inlen < sizeof(cmd))
+                       return -EINVAL;
+
+               err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
+               if (err)
+                       return err;
+
+               if (cmd.comp_mask)
+                       return -EINVAL;
 
+               if (cmd.reserved)
+                       return -EINVAL;
+       }
+
+       resp.response_length = offsetof(typeof(resp), response_length) +
+               sizeof(resp.response_length);
        in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
        out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
        if (!in_mad || !out_mad)
@@ -196,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
        }
 
+       props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
                0xffffff;
        props->vendor_part_id      = dev->dev->persist->pdev->device;
@@ -208,6 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
        props->max_qp_wr           = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
        props->max_sge             = min(dev->dev->caps.max_sq_sg,
                                         dev->dev->caps.max_rq_sg);
+       props->max_sge_rd          = MLX4_MAX_SGE_RD;
        props->max_cq              = dev->dev->quotas.cq;
        props->max_cqe             = dev->dev->caps.max_cqes;
        props->max_mr              = dev->dev->quotas.mpt;
@@ -229,7 +478,25 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
                                           props->max_mcast_grp;
        props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
+       props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
+       props->timestamp_mask = 0xFFFFFFFFFFFFULL;
 
+       if (!mlx4_is_slave(dev->dev))
+               err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
+
+       if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
+               resp.response_length += sizeof(resp.hca_core_clock_offset);
+               if (!err && !mlx4_is_slave(dev->dev)) {
+                       resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
+                       resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
+               }
+       }
+
+       if (uhw->outlen) {
+               err = ib_copy_to_udata(uhw, &resp, resp.response_length);
+               if (err)
+                       goto out;
+       }
 out:
        kfree(in_mad);
        kfree(out_mad);
@@ -375,12 +642,13 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
        props->state            = IB_PORT_DOWN;
        props->phys_state       = state_to_phys_state(props->state);
        props->active_mtu       = IB_MTU_256;
-       if (is_bonded)
-               rtnl_lock(); /* required to get upper dev */
        spin_lock_bh(&iboe->lock);
        ndev = iboe->netdevs[port - 1];
-       if (ndev && is_bonded)
-               ndev = netdev_master_upper_dev_get(ndev);
+       if (ndev && is_bonded) {
+               rcu_read_lock(); /* required to get upper dev */
+               ndev = netdev_master_upper_dev_get_rcu(ndev);
+               rcu_read_unlock();
+       }
        if (!ndev)
                goto out_unlock;
 
@@ -392,8 +660,6 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
        props->phys_state       = state_to_phys_state(props->state);
 out_unlock:
        spin_unlock_bh(&iboe->lock);
-       if (is_bonded)
-               rtnl_unlock();
 out:
        mlx4_free_cmd_mailbox(mdev->dev, mailbox);
        return err;
@@ -476,23 +742,27 @@ out:
        return err;
 }
 
-static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
-                         union ib_gid *gid)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibdev);
-
-       *gid = dev->iboe.gid_table[port - 1][index];
-
-       return 0;
-}
-
 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
                             union ib_gid *gid)
 {
-       if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+       int ret;
+
+       if (rdma_protocol_ib(ibdev, port))
                return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
-       else
-               return iboe_query_gid(ibdev, port, index, gid);
+
+       if (!rdma_protocol_roce(ibdev, port))
+               return -ENODEV;
+
+       if (!rdma_cap_roce_gid_table(ibdev, port))
+               return -ENODEV;
+
+       ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
+       if (ret == -EAGAIN) {
+               memcpy(gid, &zgid, sizeof(*gid));
+               return 0;
+       }
+
+       return ret;
 }
 
 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@@ -653,7 +923,7 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
                resp.cqe_size         = dev->dev->caps.cqe_size;
        }
 
-       context = kmalloc(sizeof *context, GFP_KERNEL);
+       context = kzalloc(sizeof(*context), GFP_KERNEL);
        if (!context)
                return ERR_PTR(-ENOMEM);
 
@@ -690,21 +960,143 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
        return 0;
 }
 
+static void  mlx4_ib_vma_open(struct vm_area_struct *area)
+{
+       /* vma_open is called when a new VMA is created on top of our VMA.
+        * This is done through either mremap flow or split_vma (usually due
+        * to mlock, madvise, munmap, etc.). We do not support a clone of the
+        * vma, as this VMA is strongly hardware related. Therefore we set the
+        * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
+        * calling us again and trying to do incorrect actions. We assume that
+        * the original vma size is exactly a single page that there will be no
+        * "splitting" operations on.
+        */
+       area->vm_ops = NULL;
+}
+
+static void  mlx4_ib_vma_close(struct vm_area_struct *area)
+{
+       struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
+
+       /* It's guaranteed that all VMAs opened on a FD are closed before the
+        * file itself is closed, therefore no sync is needed with the regular
+        * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
+        * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
+        * The close operation is usually called under mm->mmap_sem except when
+        * process is exiting.  The exiting case is handled explicitly as part
+        * of mlx4_ib_disassociate_ucontext.
+        */
+       mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
+                               area->vm_private_data;
+
+       /* set the vma context pointer to null in the mlx4_ib driver's private
+        * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
+        */
+       mlx4_ib_vma_priv_data->vma = NULL;
+}
+
+static const struct vm_operations_struct mlx4_ib_vm_ops = {
+       .open = mlx4_ib_vma_open,
+       .close = mlx4_ib_vma_close
+};
+
+static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+       int i;
+       int ret = 0;
+       struct vm_area_struct *vma;
+       struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
+       struct task_struct *owning_process  = NULL;
+       struct mm_struct   *owning_mm       = NULL;
+
+       owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+       if (!owning_process)
+               return;
+
+       owning_mm = get_task_mm(owning_process);
+       if (!owning_mm) {
+               pr_info("no mm, disassociate ucontext is pending task termination\n");
+               while (1) {
+                       /* make sure that task is dead before returning, it may
+                        * prevent a rare case of module down in parallel to a
+                        * call to mlx4_ib_vma_close.
+                        */
+                       put_task_struct(owning_process);
+                       msleep(1);
+                       owning_process = get_pid_task(ibcontext->tgid,
+                                                     PIDTYPE_PID);
+                       if (!owning_process ||
+                           owning_process->state == TASK_DEAD) {
+                               pr_info("disassociate ucontext done, task was terminated\n");
+                               /* in case task was dead need to release the task struct */
+                               if (owning_process)
+                                       put_task_struct(owning_process);
+                               return;
+                       }
+               }
+       }
+
+       /* need to protect from a race on closing the vma as part of
+        * mlx4_ib_vma_close().
+        */
+       down_read(&owning_mm->mmap_sem);
+       for (i = 0; i < HW_BAR_COUNT; i++) {
+               vma = context->hw_bar_info[i].vma;
+               if (!vma)
+                       continue;
+
+               ret = zap_vma_ptes(context->hw_bar_info[i].vma,
+                                  context->hw_bar_info[i].vma->vm_start,
+                                  PAGE_SIZE);
+               if (ret) {
+                       pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
+                       BUG_ON(1);
+               }
+
+               /* context going to be destroyed, should not access ops any more */
+               context->hw_bar_info[i].vma->vm_ops = NULL;
+       }
+
+       up_read(&owning_mm->mmap_sem);
+       mmput(owning_mm);
+       put_task_struct(owning_process);
+}
+
+static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
+                                struct mlx4_ib_vma_private_data *vma_private_data)
+{
+       vma_private_data->vma = vma;
+       vma->vm_private_data = vma_private_data;
+       vma->vm_ops =  &mlx4_ib_vm_ops;
+}
+
 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
        struct mlx4_ib_dev *dev = to_mdev(context->device);
+       struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
 
        if (vma->vm_end - vma->vm_start != PAGE_SIZE)
                return -EINVAL;
 
        if (vma->vm_pgoff == 0) {
+               /* We prevent double mmaping on same context */
+               if (mucontext->hw_bar_info[HW_BAR_DB].vma)
+                       return -EINVAL;
+
                vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
                if (io_remap_pfn_range(vma, vma->vm_start,
                                       to_mucontext(context)->uar.pfn,
                                       PAGE_SIZE, vma->vm_page_prot))
                        return -EAGAIN;
+
+               mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
+
        } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+               /* We prevent double mmaping on same context */
+               if (mucontext->hw_bar_info[HW_BAR_BF].vma)
+                       return -EINVAL;
+
                vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 
                if (io_remap_pfn_range(vma, vma->vm_start,
@@ -712,8 +1104,36 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                                       dev->dev->caps.num_uars,
                                       PAGE_SIZE, vma->vm_page_prot))
                        return -EAGAIN;
-       } else
+
+               mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
+
+       } else if (vma->vm_pgoff == 3) {
+               struct mlx4_clock_params params;
+               int ret;
+
+               /* We prevent double mmaping on same context */
+               if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
+                       return -EINVAL;
+
+               ret = mlx4_get_internal_clock_params(dev->dev, &params);
+
+               if (ret)
+                       return ret;
+
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+               if (io_remap_pfn_range(vma, vma->vm_start,
+                                      (pci_resource_start(dev->dev->persist->pdev,
+                                                          params.bar) +
+                                       params.offset)
+                                      >> PAGE_SHIFT,
+                                      PAGE_SIZE, vma->vm_page_prot))
+                       return -EAGAIN;
+
+               mlx4_ib_set_vma_data(vma,
+                                    &mucontext->hw_bar_info[HW_BAR_CLOCK]);
+       } else {
                return -EINVAL;
+       }
 
        return 0;
 }
@@ -758,6 +1178,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
                                          struct ib_udata *udata)
 {
        struct mlx4_ib_xrcd *xrcd;
+       struct ib_cq_init_attr cq_attr = {};
        int err;
 
        if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
@@ -777,7 +1198,8 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
                goto err2;
        }
 
-       xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+       cq_attr.cqe = 1;
+       xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
        if (IS_ERR(xrcd->cq)) {
                err = PTR_ERR(xrcd->cq);
                goto err3;
@@ -827,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
        return 0;
 }
 
+static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
+                                         struct mlx4_ib_counters *ctr_table)
+{
+       struct counter_index *counter, *tmp_count;
+
+       mutex_lock(&ctr_table->mutex);
+       list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
+                                list) {
+               if (counter->allocated)
+                       mlx4_counter_free(ibdev->dev, counter->index);
+               list_del(&counter->list);
+               kfree(counter);
+       }
+       mutex_unlock(&ctr_table->mutex);
+}
+
 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
                   union ib_gid *gid)
 {
@@ -1090,7 +1528,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
 
        ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
                           MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
-                          MLX4_CMD_NATIVE);
+                          MLX4_CMD_WRAPPED);
        if (ret == -ENOMEM)
                pr_err("mcg table is full. Fail to register network rule.\n");
        else if (ret == -ENXIO)
@@ -1107,7 +1545,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
        int err;
        err = mlx4_cmd(dev, reg_id, 0, 0,
                       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
-                      MLX4_CMD_NATIVE);
+                      MLX4_CMD_WRAPPED);
        if (err)
                pr_err("Fail to detach network rule. registration id = 0x%llx\n",
                       reg_id);
@@ -1185,7 +1623,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
                                            &mflow->reg_id[i].id);
                if (err)
                        goto err_create_flow;
-               i++;
                if (is_bonded) {
                        /* Application always sees one port so the mirror rule
                         * must be on port #2
@@ -1200,6 +1637,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
                        j++;
                }
 
+               i++;
        }
 
        if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
@@ -1207,7 +1645,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
                                               &mflow->reg_id[i].id);
                if (err)
                        goto err_create_flow;
-               i++;
+
                if (is_bonded) {
                        flow_attr->port = 2;
                        err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
@@ -1218,6 +1656,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
                        j++;
                }
                /* function to create mirror rule */
+               i++;
        }
 
        return &mflow->ibflow;
@@ -1489,272 +1928,6 @@ static struct device_attribute *mlx4_class_attributes[] = {
        &dev_attr_board_id
 };
 
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
-                                    struct net_device *dev)
-{
-       memcpy(eui, dev->dev_addr, 3);
-       memcpy(eui + 5, dev->dev_addr + 3, 3);
-       if (vlan_id < 0x1000) {
-               eui[3] = vlan_id >> 8;
-               eui[4] = vlan_id & 0xff;
-       } else {
-               eui[3] = 0xff;
-               eui[4] = 0xfe;
-       }
-       eui[0] ^= 2;
-}
-
-static void update_gids_task(struct work_struct *work)
-{
-       struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
-       struct mlx4_cmd_mailbox *mailbox;
-       union ib_gid *gids;
-       int err;
-       struct mlx4_dev *dev = gw->dev->dev;
-       int is_bonded = mlx4_is_bonded(dev);
-
-       if (!gw->dev->ib_active)
-               return;
-
-       mailbox = mlx4_alloc_cmd_mailbox(dev);
-       if (IS_ERR(mailbox)) {
-               pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
-               return;
-       }
-
-       gids = mailbox->buf;
-       memcpy(gids, gw->gids, sizeof gw->gids);
-
-       err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
-                      MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
-                      MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
-       if (err)
-               pr_warn("set port command failed\n");
-       else
-               if ((gw->port == 1) || !is_bonded)
-                       mlx4_ib_dispatch_event(gw->dev,
-                                              is_bonded ? 1 : gw->port,
-                                              IB_EVENT_GID_CHANGE);
-
-       mlx4_free_cmd_mailbox(dev, mailbox);
-       kfree(gw);
-}
-
-static void reset_gids_task(struct work_struct *work)
-{
-       struct update_gid_work *gw =
-                       container_of(work, struct update_gid_work, work);
-       struct mlx4_cmd_mailbox *mailbox;
-       union ib_gid *gids;
-       int err;
-       struct mlx4_dev *dev = gw->dev->dev;
-
-       if (!gw->dev->ib_active)
-               return;
-
-       mailbox = mlx4_alloc_cmd_mailbox(dev);
-       if (IS_ERR(mailbox)) {
-               pr_warn("reset gid table failed\n");
-               goto free;
-       }
-
-       gids = mailbox->buf;
-       memcpy(gids, gw->gids, sizeof(gw->gids));
-
-       if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
-                                   IB_LINK_LAYER_ETHERNET) {
-               err = mlx4_cmd(dev, mailbox->dma,
-                              MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
-                              MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
-                              MLX4_CMD_TIME_CLASS_B,
-                              MLX4_CMD_WRAPPED);
-               if (err)
-                       pr_warn("set port %d command failed\n", gw->port);
-       }
-
-       mlx4_free_cmd_mailbox(dev, mailbox);
-free:
-       kfree(gw);
-}
-
-static int update_gid_table(struct mlx4_ib_dev *dev, int port,
-                           union ib_gid *gid, int clear,
-                           int default_gid)
-{
-       struct update_gid_work *work;
-       int i;
-       int need_update = 0;
-       int free = -1;
-       int found = -1;
-       int max_gids;
-
-       if (default_gid) {
-               free = 0;
-       } else {
-               max_gids = dev->dev->caps.gid_table_len[port];
-               for (i = 1; i < max_gids; ++i) {
-                       if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
-                                   sizeof(*gid)))
-                               found = i;
-
-                       if (clear) {
-                               if (found >= 0) {
-                                       need_update = 1;
-                                       dev->iboe.gid_table[port - 1][found] =
-                                               zgid;
-                                       break;
-                               }
-                       } else {
-                               if (found >= 0)
-                                       break;
-
-                               if (free < 0 &&
-                                   !memcmp(&dev->iboe.gid_table[port - 1][i],
-                                           &zgid, sizeof(*gid)))
-                                       free = i;
-                       }
-               }
-       }
-
-       if (found == -1 && !clear && free >= 0) {
-               dev->iboe.gid_table[port - 1][free] = *gid;
-               need_update = 1;
-       }
-
-       if (!need_update)
-               return 0;
-
-       work = kzalloc(sizeof(*work), GFP_ATOMIC);
-       if (!work)
-               return -ENOMEM;
-
-       memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
-       INIT_WORK(&work->work, update_gids_task);
-       work->port = port;
-       work->dev = dev;
-       queue_work(wq, &work->work);
-
-       return 0;
-}
-
-static void mlx4_make_default_gid(struct  net_device *dev, union ib_gid *gid)
-{
-       gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
-       mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
-}
-
-
-static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
-{
-       struct update_gid_work *work;
-
-       work = kzalloc(sizeof(*work), GFP_ATOMIC);
-       if (!work)
-               return -ENOMEM;
-
-       memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
-       memset(work->gids, 0, sizeof(work->gids));
-       INIT_WORK(&work->work, reset_gids_task);
-       work->dev = dev;
-       work->port = port;
-       queue_work(wq, &work->work);
-       return 0;
-}
-
-static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
-                             struct mlx4_ib_dev *ibdev, union ib_gid *gid)
-{
-       struct mlx4_ib_iboe *iboe;
-       int port = 0;
-       struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
-                               rdma_vlan_dev_real_dev(event_netdev) :
-                               event_netdev;
-       union ib_gid default_gid;
-
-       mlx4_make_default_gid(real_dev, &default_gid);
-
-       if (!memcmp(gid, &default_gid, sizeof(*gid)))
-               return 0;
-
-       if (event != NETDEV_DOWN && event != NETDEV_UP)
-               return 0;
-
-       if ((real_dev != event_netdev) &&
-           (event == NETDEV_DOWN) &&
-           rdma_link_local_addr((struct in6_addr *)gid))
-               return 0;
-
-       iboe = &ibdev->iboe;
-       spin_lock_bh(&iboe->lock);
-
-       for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
-               if ((netif_is_bond_master(real_dev) &&
-                    (real_dev == iboe->masters[port - 1])) ||
-                    (!netif_is_bond_master(real_dev) &&
-                    (real_dev == iboe->netdevs[port - 1])))
-                       update_gid_table(ibdev, port, gid,
-                                        event == NETDEV_DOWN, 0);
-
-       spin_unlock_bh(&iboe->lock);
-       return 0;
-
-}
-
-static u8 mlx4_ib_get_dev_port(struct net_device *dev,
-                              struct mlx4_ib_dev *ibdev)
-{
-       u8 port = 0;
-       struct mlx4_ib_iboe *iboe;
-       struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
-                               rdma_vlan_dev_real_dev(dev) : dev;
-
-       iboe = &ibdev->iboe;
-
-       for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
-               if ((netif_is_bond_master(real_dev) &&
-                    (real_dev == iboe->masters[port - 1])) ||
-                    (!netif_is_bond_master(real_dev) &&
-                    (real_dev == iboe->netdevs[port - 1])))
-                       break;
-
-       if ((port == 0) || (port > ibdev->dev->caps.num_ports))
-               return 0;
-       else
-               return port;
-}
-
-static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
-                               void *ptr)
-{
-       struct mlx4_ib_dev *ibdev;
-       struct in_ifaddr *ifa = ptr;
-       union ib_gid gid;
-       struct net_device *event_netdev = ifa->ifa_dev->dev;
-
-       ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
-
-       ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
-
-       mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
-       return NOTIFY_DONE;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
-                               void *ptr)
-{
-       struct mlx4_ib_dev *ibdev;
-       struct inet6_ifaddr *ifa = ptr;
-       union  ib_gid *gid = (union ib_gid *)&ifa->addr;
-       struct net_device *event_netdev = ifa->idev->dev;
-
-       ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
-
-       mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
-       return NOTIFY_DONE;
-}
-#endif
-
 #define MLX4_IB_INVALID_MAC    ((u64)-1)
 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
                               struct net_device *dev,
@@ -1813,94 +1986,6 @@ unlock:
        mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
 }
 
-static void mlx4_ib_get_dev_addr(struct net_device *dev,
-                                struct mlx4_ib_dev *ibdev, u8 port)
-{
-       struct in_device *in_dev;
-#if IS_ENABLED(CONFIG_IPV6)
-       struct inet6_dev *in6_dev;
-       union ib_gid  *pgid;
-       struct inet6_ifaddr *ifp;
-       union ib_gid default_gid;
-#endif
-       union ib_gid gid;
-
-
-       if ((port == 0) || (port > ibdev->dev->caps.num_ports))
-               return;
-
-       /* IPv4 gids */
-       in_dev = in_dev_get(dev);
-       if (in_dev) {
-               for_ifa(in_dev) {
-                       /*ifa->ifa_address;*/
-                       ipv6_addr_set_v4mapped(ifa->ifa_address,
-                                              (struct in6_addr *)&gid);
-                       update_gid_table(ibdev, port, &gid, 0, 0);
-               }
-               endfor_ifa(in_dev);
-               in_dev_put(in_dev);
-       }
-#if IS_ENABLED(CONFIG_IPV6)
-       mlx4_make_default_gid(dev, &default_gid);
-       /* IPv6 gids */
-       in6_dev = in6_dev_get(dev);
-       if (in6_dev) {
-               read_lock_bh(&in6_dev->lock);
-               list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
-                       pgid = (union ib_gid *)&ifp->addr;
-                       if (!memcmp(pgid, &default_gid, sizeof(*pgid)))
-                               continue;
-                       update_gid_table(ibdev, port, pgid, 0, 0);
-               }
-               read_unlock_bh(&in6_dev->lock);
-               in6_dev_put(in6_dev);
-       }
-#endif
-}
-
-static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
-                                struct  net_device *dev, u8 port)
-{
-       union ib_gid gid;
-       mlx4_make_default_gid(dev, &gid);
-       update_gid_table(ibdev, port, &gid, 0, 1);
-}
-
-static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
-{
-       struct  net_device *dev;
-       struct mlx4_ib_iboe *iboe = &ibdev->iboe;
-       int i;
-       int err = 0;
-
-       for (i = 1; i <= ibdev->num_ports; ++i) {
-               if (rdma_port_get_link_layer(&ibdev->ib_dev, i) ==
-                   IB_LINK_LAYER_ETHERNET) {
-                       err = reset_gid_table(ibdev, i);
-                       if (err)
-                               goto out;
-               }
-       }
-
-       read_lock(&dev_base_lock);
-       spin_lock_bh(&iboe->lock);
-
-       for_each_netdev(&init_net, dev) {
-               u8 port = mlx4_ib_get_dev_port(dev, ibdev);
-               /* port will be non-zero only for ETH ports */
-               if (port) {
-                       mlx4_ib_set_default_gid(ibdev, dev, port);
-                       mlx4_ib_get_dev_addr(dev, ibdev, port);
-               }
-       }
-
-       spin_unlock_bh(&iboe->lock);
-       read_unlock(&dev_base_lock);
-out:
-       return err;
-}
-
 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
                                 struct net_device *dev,
                                 unsigned long event)
@@ -1910,81 +1995,22 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
        int update_qps_port = -1;
        int port;
 
+       ASSERT_RTNL();
+
        iboe = &ibdev->iboe;
 
        spin_lock_bh(&iboe->lock);
        mlx4_foreach_ib_transport_port(port, ibdev->dev) {
-               enum ib_port_state      port_state = IB_PORT_NOP;
-               struct net_device *old_master = iboe->masters[port - 1];
-               struct net_device *curr_netdev;
-               struct net_device *curr_master;
 
                iboe->netdevs[port - 1] =
                        mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
-               if (iboe->netdevs[port - 1])
-                       mlx4_ib_set_default_gid(ibdev,
-                                               iboe->netdevs[port - 1], port);
-               curr_netdev = iboe->netdevs[port - 1];
-
-               if (iboe->netdevs[port - 1] &&
-                   netif_is_bond_slave(iboe->netdevs[port - 1])) {
-                       iboe->masters[port - 1] = netdev_master_upper_dev_get(
-                               iboe->netdevs[port - 1]);
-               } else {
-                       iboe->masters[port - 1] = NULL;
-               }
-               curr_master = iboe->masters[port - 1];
 
                if (dev == iboe->netdevs[port - 1] &&
                    (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
                     event == NETDEV_UP || event == NETDEV_CHANGE))
                        update_qps_port = port;
 
-               if (curr_netdev) {
-                       port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
-                                               IB_PORT_ACTIVE : IB_PORT_DOWN;
-                       mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
-                       if (curr_master) {
-                               /* if using bonding/team and a slave port is down, we
-                                * don't want the bond IP based gids in the table since
-                                * flows that select port by gid may get the down port.
-                               */
-                               if (port_state == IB_PORT_DOWN &&
-                                   !mlx4_is_bonded(ibdev->dev)) {
-                                       reset_gid_table(ibdev, port);
-                                       mlx4_ib_set_default_gid(ibdev,
-                                                               curr_netdev,
-                                                               port);
-                               } else {
-                                       /* gids from the upper dev (bond/team)
-                                        * should appear in port's gid table
-                                       */
-                                       mlx4_ib_get_dev_addr(curr_master,
-                                                            ibdev, port);
-                               }
-                       }
-                       /* if bonding is used it is possible that we add it to
-                        * masters only after IP address is assigned to the
-                        * net bonding interface.
-                       */
-                       if (curr_master && (old_master != curr_master)) {
-                               reset_gid_table(ibdev, port);
-                               mlx4_ib_set_default_gid(ibdev,
-                                                       curr_netdev, port);
-                               mlx4_ib_get_dev_addr(curr_master, ibdev, port);
-                       }
-
-                       if (!curr_master && (old_master != curr_master)) {
-                               reset_gid_table(ibdev, port);
-                               mlx4_ib_set_default_gid(ibdev,
-                                                       curr_netdev, port);
-                               mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
-                       }
-               } else {
-                       reset_gid_table(ibdev, port);
-               }
        }
-
        spin_unlock_bh(&iboe->lock);
 
        if (update_qps_port > 0)
@@ -2041,77 +2067,75 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
 
 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 {
-       char name[80];
-       int eq_per_port = 0;
-       int added_eqs = 0;
-       int total_eqs = 0;
-       int i, j, eq;
+       int i, j, eq = 0, total_eqs = 0;
 
-       /* Legacy mode or comp_pool is not large enough */
-       if (dev->caps.comp_pool == 0 ||
-           dev->caps.num_ports > dev->caps.comp_pool)
-               return;
-
-       eq_per_port = dev->caps.comp_pool / dev->caps.num_ports;
-
-       /* Init eq table */
-       added_eqs = 0;
-       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
-               added_eqs += eq_per_port;
-
-       total_eqs = dev->caps.num_comp_vectors + added_eqs;
-
-       ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
+       ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
+                                 sizeof(ibdev->eq_table[0]), GFP_KERNEL);
        if (!ibdev->eq_table)
                return;
 
-       ibdev->eq_added = added_eqs;
-
-       eq = 0;
-       mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
-               for (j = 0; j < eq_per_port; j++) {
-                       snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
-                                i, j, dev->persist->pdev->bus->name);
-                       /* Set IRQ for specific name (per ring) */
-                       if (mlx4_assign_eq(dev, name, NULL,
-                                          &ibdev->eq_table[eq])) {
-                               /* Use legacy (same as mlx4_en driver) */
-                               pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
-                               ibdev->eq_table[eq] =
-                                       (eq % dev->caps.num_comp_vectors);
-                       }
-                       eq++;
+       for (i = 1; i <= dev->caps.num_ports; i++) {
+               for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
+                    j++, total_eqs++) {
+                       if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
+                               continue;
+                       ibdev->eq_table[eq] = total_eqs;
+                       if (!mlx4_assign_eq(dev, i,
+                                           &ibdev->eq_table[eq]))
+                               eq++;
+                       else
+                               ibdev->eq_table[eq] = -1;
                }
        }
 
-       /* Fill the reset of the vector with legacy EQ */
-       for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
-               ibdev->eq_table[eq++] = i;
+       for (i = eq; i < dev->caps.num_comp_vectors;
+            ibdev->eq_table[i++] = -1)
+               ;
 
        /* Advertise the new number of EQs to clients */
-       ibdev->ib_dev.num_comp_vectors = total_eqs;
+       ibdev->ib_dev.num_comp_vectors = eq;
 }
 
 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 {
        int i;
+       int total_eqs = ibdev->ib_dev.num_comp_vectors;
 
-       /* no additional eqs were added */
+       /* no eqs were allocated */
        if (!ibdev->eq_table)
                return;
 
        /* Reset the advertised EQ number */
-       ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
+       ibdev->ib_dev.num_comp_vectors = 0;
 
-       /* Free only the added eqs */
-       for (i = 0; i < ibdev->eq_added; i++) {
-               /* Don't free legacy eqs if used */
-               if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
-                       continue;
+       for (i = 0; i < total_eqs; i++)
                mlx4_release_eq(dev, ibdev->eq_table[i]);
-       }
 
        kfree(ibdev->eq_table);
+       ibdev->eq_table = NULL;
+}
+
+static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
+                              struct ib_port_immutable *immutable)
+{
+       struct ib_port_attr attr;
+       int err;
+
+       err = mlx4_ib_query_port(ibdev, port_num, &attr);
+       if (err)
+               return err;
+
+       immutable->pkey_tbl_len = attr.pkey_tbl_len;
+       immutable->gid_tbl_len = attr.gid_tbl_len;
+
+       if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+               immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+       else
+               immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+       return 0;
 }
 
 static void *mlx4_ib_add(struct mlx4_dev *dev)
@@ -2123,6 +2147,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        struct mlx4_ib_iboe *iboe;
        int ib_num_ports = 0;
        int num_req_counters;
+       int allocated;
+       u32 counter_index;
+       struct counter_index *new_counter_index = NULL;
 
        pr_info_once("%s", mlx4_ib_version);
 
@@ -2167,6 +2194,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                                                1 : ibdev->num_ports;
        ibdev->ib_dev.num_comp_vectors  = dev->caps.num_comp_vectors;
        ibdev->ib_dev.dma_device        = &dev->persist->pdev->dev;
+       ibdev->ib_dev.get_netdev        = mlx4_ib_get_netdev;
+       ibdev->ib_dev.add_gid           = mlx4_ib_add_gid;
+       ibdev->ib_dev.del_gid           = mlx4_ib_del_gid;
 
        if (dev->caps.userspace_caps)
                ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2235,12 +2265,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.reg_user_mr       = mlx4_ib_reg_user_mr;
        ibdev->ib_dev.rereg_user_mr     = mlx4_ib_rereg_user_mr;
        ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
-       ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
-       ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
-       ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
+       ibdev->ib_dev.alloc_mr          = mlx4_ib_alloc_mr;
+       ibdev->ib_dev.map_mr_sg         = mlx4_ib_map_mr_sg;
        ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
        ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
        ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
+       ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
+       ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
 
        if (!mlx4_is_slave(ibdev->dev)) {
                ibdev->ib_dev.alloc_fmr         = mlx4_ib_fmr_alloc;
@@ -2278,6 +2309,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                        (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
        }
 
+       ibdev->ib_dev.uverbs_ex_cmd_mask |=
+               (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+
        mlx4_ib_alloc_eqs(dev, ibdev);
 
        spin_lock_init(&iboe->lock);
@@ -2285,22 +2321,58 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (init_node_data(ibdev))
                goto err_map;
 
+       for (i = 0; i < ibdev->num_ports; ++i) {
+               mutex_init(&ibdev->counters_table[i].mutex);
+               INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+       }
+
        num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
        for (i = 0; i < num_req_counters; ++i) {
                mutex_init(&ibdev->qp1_proxy_lock[i]);
+               allocated = 0;
                if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
                                                IB_LINK_LAYER_ETHERNET) {
-                       err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
+                       err = mlx4_counter_alloc(ibdev->dev, &counter_index);
+                       /* if failed to allocate a new counter, use default */
                        if (err)
-                               ibdev->counters[i] = -1;
-               } else {
-                       ibdev->counters[i] = -1;
+                               counter_index =
+                                       mlx4_get_default_counter_index(dev,
+                                                                      i + 1);
+                       else
+                               allocated = 1;
+               } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
+                       counter_index = mlx4_get_default_counter_index(dev,
+                                                                      i + 1);
                }
+               new_counter_index = kmalloc(sizeof(*new_counter_index),
+                                           GFP_KERNEL);
+               if (!new_counter_index) {
+                       if (allocated)
+                               mlx4_counter_free(ibdev->dev, counter_index);
+                       goto err_counter;
+               }
+               new_counter_index->index = counter_index;
+               new_counter_index->allocated = allocated;
+               list_add_tail(&new_counter_index->list,
+                             &ibdev->counters_table[i].counters_list);
+               ibdev->counters_table[i].default_counter = counter_index;
+               pr_info("counter index %d for port %d allocated %d\n",
+                       counter_index, i + 1, allocated);
        }
        if (mlx4_is_bonded(dev))
-               for (i = 1; i < ibdev->num_ports ; ++i)
-                       ibdev->counters[i] = ibdev->counters[0];
-
+               for (i = 1; i < ibdev->num_ports ; ++i) {
+                       new_counter_index =
+                                       kmalloc(sizeof(struct counter_index),
+                                               GFP_KERNEL);
+                       if (!new_counter_index)
+                               goto err_counter;
+                       new_counter_index->index = counter_index;
+                       new_counter_index->allocated = 0;
+                       list_add_tail(&new_counter_index->list,
+                                     &ibdev->counters_table[i].counters_list);
+                       ibdev->counters_table[i].default_counter =
+                                                               counter_index;
+               }
 
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
                ib_num_ports++;
@@ -2360,26 +2432,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                                goto err_notif;
                        }
                }
-               if (!iboe->nb_inet.notifier_call) {
-                       iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
-                       err = register_inetaddr_notifier(&iboe->nb_inet);
-                       if (err) {
-                               iboe->nb_inet.notifier_call = NULL;
-                               goto err_notif;
-                       }
-               }
-#if IS_ENABLED(CONFIG_IPV6)
-               if (!iboe->nb_inet6.notifier_call) {
-                       iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
-                       err = register_inet6addr_notifier(&iboe->nb_inet6);
-                       if (err) {
-                               iboe->nb_inet6.notifier_call = NULL;
-                               goto err_notif;
-                       }
-               }
-#endif
-               if (mlx4_ib_init_gid_table(ibdev))
-                       goto err_notif;
        }
 
        for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -2410,18 +2462,6 @@ err_notif:
                        pr_warn("failure unregistering notifier\n");
                ibdev->iboe.nb.notifier_call = NULL;
        }
-       if (ibdev->iboe.nb_inet.notifier_call) {
-               if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
-                       pr_warn("failure unregistering notifier\n");
-               ibdev->iboe.nb_inet.notifier_call = NULL;
-       }
-#if IS_ENABLED(CONFIG_IPV6)
-       if (ibdev->iboe.nb_inet6.notifier_call) {
-               if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
-                       pr_warn("failure unregistering notifier\n");
-               ibdev->iboe.nb_inet6.notifier_call = NULL;
-       }
-#endif
        flush_workqueue(wq);
 
        mlx4_ib_close_sriov(ibdev);
@@ -2440,9 +2480,8 @@ err_steer_qp_release:
                mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
                                      ibdev->steer_qpn_count);
 err_counter:
-       for (; i; --i)
-               if (ibdev->counters[i - 1] != -1)
-                       mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
+       for (i = 0; i < ibdev->num_ports; ++i)
+               mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
 
 err_map:
        iounmap(ibdev->uar_map);
@@ -2545,23 +2584,10 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
                kfree(ibdev->ib_uc_qpns_bitmap);
        }
 
-       if (ibdev->iboe.nb_inet.notifier_call) {
-               if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
-                       pr_warn("failure unregistering notifier\n");
-               ibdev->iboe.nb_inet.notifier_call = NULL;
-       }
-#if IS_ENABLED(CONFIG_IPV6)
-       if (ibdev->iboe.nb_inet6.notifier_call) {
-               if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
-                       pr_warn("failure unregistering notifier\n");
-               ibdev->iboe.nb_inet6.notifier_call = NULL;
-       }
-#endif
-
        iounmap(ibdev->uar_map);
        for (p = 0; p < ibdev->num_ports; ++p)
-               if (ibdev->counters[p] != -1)
-                       mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
+               mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
+
        mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
                mlx4_CLOSE_PORT(dev, p);
 
@@ -2592,31 +2618,33 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
        dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
        if (!dm) {
                pr_err("failed to allocate memory for tunneling qp update\n");
-               goto out;
+               return;
        }
 
        for (i = 0; i < ports; i++) {
                dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
                if (!dm[i]) {
                        pr_err("failed to allocate memory for tunneling qp update work struct\n");
-                       for (i = 0; i < dev->caps.num_ports; i++) {
-                               if (dm[i])
-                                       kfree(dm[i]);
-                       }
+                       while (--i >= 0)
+                               kfree(dm[i]);
                        goto out;
                }
-       }
-       /* initialize or tear down tunnel QPs for the slave */
-       for (i = 0; i < ports; i++) {
                INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
                dm[i]->port = first_port + i + 1;
                dm[i]->slave = slave;
                dm[i]->do_init = do_init;
                dm[i]->dev = ibdev;
-               spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
-               if (!ibdev->sriov.is_going_down)
+       }
+       /* initialize or tear down tunnel QPs for the slave */
+       spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
+       if (!ibdev->sriov.is_going_down) {
+               for (i = 0; i < ports; i++)
                        queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
                spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+       } else {
+               spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+               for (i = 0; i < ports; i++)
+                       kfree(dm[i]);
        }
 out:
        kfree(dm);