These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / net / ethernet / mellanox / mlx5 / core / main.c
index 28425e5..6cf6d93 100644 (file)
@@ -30,7 +30,7 @@
  * SOFTWARE.
  */
 
-#include <asm-generic/kmap_types.h>
+#include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/io-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
 #include <linux/kmod.h>
+#include <linux/delay.h>
 #include <linux/mlx5/mlx5_ifc.h>
 #include "mlx5_core.h"
 
-#define DRIVER_NAME "mlx5_core"
-#define DRIVER_VERSION "3.0"
-#define DRIVER_RELDATE  "January 2015"
-
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -65,7 +64,6 @@ static int prof_sel = MLX5_DEFAULT_PROF;
 module_param_named(prof_sel, prof_sel, int, 0444);
 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
 
-struct workqueue_struct *mlx5_core_wq;
 static LIST_HEAD(intf_list);
 static LIST_HEAD(dev_list);
 static DEFINE_MUTEX(intf_mutex);
@@ -155,6 +153,25 @@ static struct mlx5_profile profile[] = {
        },
 };
 
+#define FW_INIT_TIMEOUT_MILI   2000
+#define FW_INIT_WAIT_MS                2
+
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+{
+       unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+       int err = 0;
+
+       while (fw_initializing(dev)) {
+               if (time_after(jiffies, end)) {
+                       err = -EBUSY;
+                       break;
+               }
+               msleep(FW_INIT_WAIT_MS);
+       }
+
+       return err;
+}
+
 static int set_dma_caps(struct pci_dev *pdev)
 {
        int err;
@@ -185,6 +202,34 @@ static int set_dma_caps(struct pci_dev *pdev)
        return err;
 }
 
+static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
+{
+       struct pci_dev *pdev = dev->pdev;
+       int err = 0;
+
+       mutex_lock(&dev->pci_status_mutex);
+       if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
+               err = pci_enable_device(pdev);
+               if (!err)
+                       dev->pci_status = MLX5_PCI_STATUS_ENABLED;
+       }
+       mutex_unlock(&dev->pci_status_mutex);
+
+       return err;
+}
+
+static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
+{
+       struct pci_dev *pdev = dev->pdev;
+
+       mutex_lock(&dev->pci_status_mutex);
+       if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
+               pci_disable_device(pdev);
+               dev->pci_status = MLX5_PCI_STATUS_DISABLED;
+       }
+       mutex_unlock(&dev->pci_status_mutex);
+}
+
 static int request_bar(struct pci_dev *pdev)
 {
        int err = 0;
@@ -208,24 +253,28 @@ static void release_bar(struct pci_dev *pdev)
 
 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
-       int num_eqs = 1 << dev->caps.gen.log_max_eq;
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_eq_table *table = &priv->eq_table;
+       int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
        int nvec;
        int i;
 
-       nvec = dev->caps.gen.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
+       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+              MLX5_EQ_VEC_COMP_BASE;
        nvec = min_t(int, nvec, num_eqs);
        if (nvec <= MLX5_EQ_VEC_COMP_BASE)
                return -ENOMEM;
 
-       table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL);
-       if (!table->msix_arr)
-               return -ENOMEM;
+       priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL);
+
+       priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
+       if (!priv->msix_arr || !priv->irq_info)
+               goto err_free_msix;
 
        for (i = 0; i < nvec; i++)
-               table->msix_arr[i].entry = i;
+               priv->msix_arr[i].entry = i;
 
-       nvec = pci_enable_msix_range(dev->pdev, table->msix_arr,
+       nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
                                     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
        if (nvec < 0)
                return nvec;
@@ -233,14 +282,20 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev)
        table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
 
        return 0;
+
+err_free_msix:
+       kfree(priv->irq_info);
+       kfree(priv->msix_arr);
+       return -ENOMEM;
 }
 
 static void mlx5_disable_msix(struct mlx5_core_dev *dev)
 {
-       struct mlx5_eq_table *table = &dev->priv.eq_table;
+       struct mlx5_priv *priv = &dev->priv;
 
        pci_disable_msix(dev->pdev);
-       kfree(table->msix_arr);
+       kfree(priv->irq_info);
+       kfree(priv->msix_arr);
 }
 
 struct mlx5_reg_host_endianess {
@@ -277,98 +332,20 @@ static u16 to_fw_pkey_sz(u32 size)
        }
 }
 
-/* selectively copy writable fields clearing any reserved area
- */
-static void copy_rw_fields(void *to, struct mlx5_caps *from)
-{
-       __be64 *flags_off = (__be64 *)MLX5_ADDR_OF(cmd_hca_cap, to, reserved_22);
-       u64 v64;
-
-       MLX5_SET(cmd_hca_cap, to, log_max_qp, from->gen.log_max_qp);
-       MLX5_SET(cmd_hca_cap, to, log_max_ra_req_qp, from->gen.log_max_ra_req_qp);
-       MLX5_SET(cmd_hca_cap, to, log_max_ra_res_qp, from->gen.log_max_ra_res_qp);
-       MLX5_SET(cmd_hca_cap, to, pkey_table_size, from->gen.pkey_table_size);
-       MLX5_SET(cmd_hca_cap, to, pkey_table_size, to_fw_pkey_sz(from->gen.pkey_table_size));
-       MLX5_SET(cmd_hca_cap, to, log_uar_page_sz, PAGE_SHIFT - 12);
-       v64 = from->gen.flags & MLX5_CAP_BITS_RW_MASK;
-       *flags_off = cpu_to_be64(v64);
-}
-
-static u16 get_pkey_table_size(int pkey)
-{
-       if (pkey > MLX5_MAX_LOG_PKEY_TABLE)
-               return 0;
-
-       return MLX5_MIN_PKEY_TABLE_SIZE << pkey;
-}
-
-static void fw2drv_caps(struct mlx5_caps *caps, void *out)
-{
-       struct mlx5_general_caps *gen = &caps->gen;
-
-       gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz);
-       gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz);
-       gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp);
-       gen->log_max_strq = MLX5_GET_PR(cmd_hca_cap, out, log_max_strq_sz);
-       gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srqs);
-       gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz);
-       gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq);
-       gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz);
-       gen->log_max_mkey = MLX5_GET_PR(cmd_hca_cap, out, log_max_mkey);
-       gen->log_max_eq = MLX5_GET_PR(cmd_hca_cap, out, log_max_eq);
-       gen->max_indirection = MLX5_GET_PR(cmd_hca_cap, out, max_indirection);
-       gen->log_max_mrw_sz = MLX5_GET_PR(cmd_hca_cap, out, log_max_mrw_sz);
-       gen->log_max_bsf_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_bsf_list_size);
-       gen->log_max_klm_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_klm_list_size);
-       gen->log_max_ra_req_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_dc);
-       gen->log_max_ra_res_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_dc);
-       gen->log_max_ra_req_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_qp);
-       gen->log_max_ra_res_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_qp);
-       gen->max_qp_counters = MLX5_GET_PR(cmd_hca_cap, out, max_qp_cnt);
-       gen->pkey_table_size = get_pkey_table_size(MLX5_GET_PR(cmd_hca_cap, out, pkey_table_size));
-       gen->local_ca_ack_delay = MLX5_GET_PR(cmd_hca_cap, out, local_ca_ack_delay);
-       gen->num_ports = MLX5_GET_PR(cmd_hca_cap, out, num_ports);
-       gen->log_max_msg = MLX5_GET_PR(cmd_hca_cap, out, log_max_msg);
-       gen->stat_rate_support = MLX5_GET_PR(cmd_hca_cap, out, stat_rate_support);
-       gen->flags = be64_to_cpu(*(__be64 *)MLX5_ADDR_OF(cmd_hca_cap, out, reserved_22));
-       pr_debug("flags = 0x%llx\n", gen->flags);
-       gen->uar_sz = MLX5_GET_PR(cmd_hca_cap, out, uar_sz);
-       gen->min_log_pg_sz = MLX5_GET_PR(cmd_hca_cap, out, log_pg_sz);
-       gen->bf_reg_size = MLX5_GET_PR(cmd_hca_cap, out, bf);
-       gen->bf_reg_size = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_bf_reg_size);
-       gen->max_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq);
-       gen->max_rq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_rq);
-       gen->max_dc_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq_dc);
-       gen->max_qp_mcg = MLX5_GET_PR(cmd_hca_cap, out, max_qp_mcg);
-       gen->log_max_pd = MLX5_GET_PR(cmd_hca_cap, out, log_max_pd);
-       gen->log_max_xrcd = MLX5_GET_PR(cmd_hca_cap, out, log_max_xrcd);
-       gen->log_uar_page_sz = MLX5_GET_PR(cmd_hca_cap, out, log_uar_page_sz);
-}
-
-static const char *caps_opmod_str(u16 opmod)
-{
-       switch (opmod) {
-       case HCA_CAP_OPMOD_GET_MAX:
-               return "GET_MAX";
-       case HCA_CAP_OPMOD_GET_CUR:
-               return "GET_CUR";
-       default:
-               return "Invalid";
-       }
-}
-
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
-                      u16 opmod)
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
+                      enum mlx5_cap_mode cap_mode)
 {
        u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
        int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
-       void *out;
+       void *out, *hca_caps;
+       u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
        int err;
 
        memset(in, 0, sizeof(in));
        out = kzalloc(out_sz, GFP_KERNEL);
        if (!out)
                return -ENOMEM;
+
        MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
        MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
        err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
@@ -377,12 +354,30 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
 
        err = mlx5_cmd_status_to_err_v2(out);
        if (err) {
-               mlx5_core_warn(dev, "query max hca cap failed, %d\n", err);
+               mlx5_core_warn(dev,
+                              "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
+                              cap_type, cap_mode, err);
                goto query_ex;
        }
-       mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod));
-       fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct));
 
+       hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+       switch (cap_mode) {
+       case HCA_CAP_OPMOD_GET_MAX:
+               memcpy(dev->hca_caps_max[cap_type], hca_caps,
+                      MLX5_UN_SZ_BYTES(hca_cap_union));
+               break;
+       case HCA_CAP_OPMOD_GET_CUR:
+               memcpy(dev->hca_caps_cur[cap_type], hca_caps,
+                      MLX5_UN_SZ_BYTES(hca_cap_union));
+               break;
+       default:
+               mlx5_core_warn(dev,
+                              "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
+                              cap_type, cap_mode);
+               err = -EINVAL;
+               break;
+       }
 query_ex:
        kfree(out);
        return err;
@@ -409,49 +404,47 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
        void *set_ctx = NULL;
        struct mlx5_profile *prof = dev->profile;
-       struct mlx5_caps *cur_caps = NULL;
-       struct mlx5_caps *max_caps = NULL;
        int err = -ENOMEM;
        int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+       void *set_hca_cap;
 
        set_ctx = kzalloc(set_sz, GFP_KERNEL);
        if (!set_ctx)
                goto query_ex;
 
-       max_caps = kzalloc(sizeof(*max_caps), GFP_KERNEL);
-       if (!max_caps)
-               goto query_ex;
-
-       cur_caps = kzalloc(sizeof(*cur_caps), GFP_KERNEL);
-       if (!cur_caps)
-               goto query_ex;
-
-       err = mlx5_core_get_caps(dev, max_caps, HCA_CAP_OPMOD_GET_MAX);
+       err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
        if (err)
                goto query_ex;
 
-       err = mlx5_core_get_caps(dev, cur_caps, HCA_CAP_OPMOD_GET_CUR);
+       err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
        if (err)
                goto query_ex;
 
+       set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+                                  capability);
+       memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
+              MLX5_ST_SZ_BYTES(cmd_hca_cap));
+
+       mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
+                     mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
+                     128);
        /* we limit the size of the pkey table to 128 entries for now */
-       cur_caps->gen.pkey_table_size = 128;
+       MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
+                to_fw_pkey_sz(128));
 
        if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
-               cur_caps->gen.log_max_qp = prof->log_max_qp;
+               MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
+                        prof->log_max_qp);
 
-       /* disable checksum */
-       cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+       /* disable cmdif checksum */
+       MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+       MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
 
-       copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, hca_capability_struct),
-                      cur_caps);
        err = set_caps(dev, set_ctx, set_sz);
 
 query_ex:
-       kfree(cur_caps);
-       kfree(max_caps);
        kfree(set_ctx);
-
        return err;
 }
 
@@ -507,7 +500,76 @@ static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
        return 0;
 }
 
-int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       struct msix_entry *msix = priv->msix_arr;
+       int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
+       int numa_node           = priv->numa_node;
+       int err;
+
+       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+                       priv->irq_info[i].mask);
+
+       err = irq_set_affinity_hint(irq, priv->irq_info[i].mask);
+       if (err) {
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x",
+                              irq);
+               goto err_clear_mask;
+       }
+
+       return 0;
+
+err_clear_mask:
+       free_cpumask_var(priv->irq_info[i].mask);
+       return err;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       struct msix_entry *msix = priv->msix_arr;
+       int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+               err = mlx5_irq_set_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+                   unsigned int *irqn)
 {
        struct mlx5_eq_table *table = &dev->priv.eq_table;
        struct mlx5_eq *eq, *n;
@@ -549,7 +611,7 @@ static void free_comp_eqs(struct mlx5_core_dev *dev)
 static int alloc_comp_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = &dev->priv.eq_table;
-       char name[MLX5_MAX_EQ_NAME];
+       char name[MLX5_MAX_IRQ_NAME];
        struct mlx5_eq *eq;
        int ncomp_vec;
        int nent;
@@ -566,7 +628,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev)
                        goto clean;
                }
 
-               snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
                err = mlx5_create_map_eq(dev, eq,
                                         i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
                                         name, &dev->priv.uuari.uars[0]);
@@ -588,12 +650,197 @@ clean:
        return err;
 }
 
-static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
+#ifdef CONFIG_MLX5_CORE_EN
+static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
-       struct mlx5_priv *priv = &dev->priv;
+       u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
+       u32 query_out[MLX5_ST_SZ_DW(query_issi_out)];
+       u32 set_in[MLX5_ST_SZ_DW(set_issi_in)];
+       u32 set_out[MLX5_ST_SZ_DW(set_issi_out)];
        int err;
+       u32 sup_issi;
+
+       memset(query_in, 0, sizeof(query_in));
+       memset(query_out, 0, sizeof(query_out));
+
+       MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
+
+       err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in),
+                                        query_out, sizeof(query_out));
+       if (err) {
+               if (((struct mlx5_outbox_hdr *)query_out)->status ==
+                   MLX5_CMD_STAT_BAD_OP_ERR) {
+                       pr_debug("Only ISSI 0 is supported\n");
+                       return 0;
+               }
+
+               pr_err("failed to query ISSI\n");
+               return err;
+       }
+
+       sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
+
+       if (sup_issi & (1 << 1)) {
+               memset(set_in, 0, sizeof(set_in));
+               memset(set_out, 0, sizeof(set_out));
+
+               MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
+               MLX5_SET(set_issi_in, set_in, current_issi, 1);
+
+               err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in),
+                                                set_out, sizeof(set_out));
+               if (err) {
+                       pr_err("failed to set ISSI=1\n");
+                       return err;
+               }
+
+               dev->issi = 1;
+
+               return 0;
+       } else if (sup_issi & (1 << 0) || !sup_issi) {
+               return 0;
+       }
+
+       return -ENOTSUPP;
+}
+#endif
+
+static int map_bf_area(struct mlx5_core_dev *dev)
+{
+       resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
+       resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
+
+       dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
+
+       return dev->priv.bf_mapping ? 0 : -ENOMEM;
+}
+
+static void unmap_bf_area(struct mlx5_core_dev *dev)
+{
+       if (dev->priv.bf_mapping)
+               io_mapping_free(dev->priv.bf_mapping);
+}
+
+static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
+       if (!dev_ctx)
+               return;
+
+       dev_ctx->intf    = intf;
+       dev_ctx->context = intf->add(dev);
+
+       if (dev_ctx->context) {
+               spin_lock_irq(&priv->ctx_lock);
+               list_add_tail(&dev_ctx->list, &priv->ctx_list);
+               spin_unlock_irq(&priv->ctx_lock);
+       } else {
+               kfree(dev_ctx);
+       }
+}
+
+static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+               if (dev_ctx->intf == intf) {
+                       spin_lock_irq(&priv->ctx_lock);
+                       list_del(&dev_ctx->list);
+                       spin_unlock_irq(&priv->ctx_lock);
+
+                       intf->remove(dev, dev_ctx->context);
+                       kfree(dev_ctx);
+                       return;
+               }
+}
+
+static int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_interface *intf;
+
+       mutex_lock(&intf_mutex);
+       list_add_tail(&priv->dev_list, &dev_list);
+       list_for_each_entry(intf, &intf_list, list)
+               mlx5_add_device(intf, priv);
+       mutex_unlock(&intf_mutex);
+
+       return 0;
+}
+
+static void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_interface *intf;
+
+       mutex_lock(&intf_mutex);
+       list_for_each_entry(intf, &intf_list, list)
+               mlx5_remove_device(intf, priv);
+       list_del(&priv->dev_list);
+       mutex_unlock(&intf_mutex);
+}
+
+int mlx5_register_interface(struct mlx5_interface *intf)
+{
+       struct mlx5_priv *priv;
+
+       if (!intf->add || !intf->remove)
+               return -EINVAL;
+
+       mutex_lock(&intf_mutex);
+       list_add_tail(&intf->list, &intf_list);
+       list_for_each_entry(priv, &dev_list, dev_list)
+               mlx5_add_device(intf, priv);
+       mutex_unlock(&intf_mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL(mlx5_register_interface);
+
+void mlx5_unregister_interface(struct mlx5_interface *intf)
+{
+       struct mlx5_priv *priv;
+
+       mutex_lock(&intf_mutex);
+       list_for_each_entry(priv, &dev_list, dev_list)
+               mlx5_remove_device(intf, priv);
+       list_del(&intf->list);
+       mutex_unlock(&intf_mutex);
+}
+EXPORT_SYMBOL(mlx5_unregister_interface);
+
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
+{
+       struct mlx5_priv *priv = &mdev->priv;
+       struct mlx5_device_context *dev_ctx;
+       unsigned long flags;
+       void *result = NULL;
+
+       spin_lock_irqsave(&priv->ctx_lock, flags);
+
+       list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
+               if ((dev_ctx->intf->protocol == protocol) &&
+                   dev_ctx->intf->get_dev) {
+                       result = dev_ctx->intf->get_dev(dev_ctx->context);
+                       break;
+               }
+
+       spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+       return result;
+}
+EXPORT_SYMBOL(mlx5_get_protocol_dev);
+
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+       struct pci_dev *pdev = dev->pdev;
+       int err = 0;
 
-       dev->pdev = pdev;
        pci_set_drvdata(dev->pdev, dev);
        strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
        priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
@@ -602,11 +849,15 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
        INIT_LIST_HEAD(&priv->pgdir_list);
        spin_lock_init(&priv->mkey_lock);
 
+       mutex_init(&priv->alloc_mutex);
+
+       priv->numa_node = dev_to_node(&dev->pdev->dev);
+
        priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
        if (!priv->dbg_root)
                return -ENOMEM;
 
-       err = pci_enable_device(pdev);
+       err = mlx5_pci_enable_device(dev);
        if (err) {
                dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
                goto err_dbg;
@@ -633,13 +884,61 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
                dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
                goto err_clr_master;
        }
+
+       return 0;
+
+err_clr_master:
+       pci_clear_master(dev->pdev);
+       release_bar(dev->pdev);
+err_disable:
+       mlx5_pci_disable_device(dev);
+
+err_dbg:
+       debugfs_remove(priv->dbg_root);
+       return err;
+}
+
+static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+       iounmap(dev->iseg);
+       pci_clear_master(dev->pdev);
+       release_bar(dev->pdev);
+       mlx5_pci_disable_device(dev);
+       debugfs_remove(priv->dbg_root);
+}
+
+#define MLX5_IB_MOD "mlx5_ib"
+static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+       struct pci_dev *pdev = dev->pdev;
+       int err;
+
+       mutex_lock(&dev->intf_state_mutex);
+       if (dev->interface_state == MLX5_INTERFACE_STATE_UP) {
+               dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
+                        __func__);
+               goto out;
+       }
+
        dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
                 fw_rev_min(dev), fw_rev_sub(dev));
 
+       /* on load removing any previous indication of internal error, device is
+        * up
+        */
+       dev->state = MLX5_DEVICE_STATE_UP;
+
        err = mlx5_cmd_init(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
-               goto err_unmap;
+               goto out_err;
+       }
+
+       err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+       if (err) {
+               dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
+                       FW_INIT_TIMEOUT_MILI);
+               goto out_err;
        }
 
        mlx5_pagealloc_init(dev);
@@ -650,6 +949,14 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
                goto err_pagealloc_cleanup;
        }
 
+#ifdef CONFIG_MLX5_CORE_EN
+       err = mlx5_core_set_issi(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to set issi\n");
+               goto err_disable_hca;
+       }
+#endif
+
        err = mlx5_satisfy_startup_pages(dev, 1);
        if (err) {
                dev_err(&pdev->dev, "failed to allocate boot pages\n");
@@ -688,15 +995,15 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 
        mlx5_start_health_poll(dev);
 
-       err = mlx5_cmd_query_hca_cap(dev, &dev->caps);
+       err = mlx5_query_hca_caps(dev);
        if (err) {
                dev_err(&pdev->dev, "query hca failed\n");
                goto err_stop_poll;
        }
 
-       err = mlx5_cmd_query_adapter(dev);
+       err = mlx5_query_board_id(dev);
        if (err) {
-               dev_err(&pdev->dev, "query adapter failed\n");
+               dev_err(&pdev->dev, "query board id failed\n");
                goto err_stop_poll;
        }
 
@@ -730,6 +1037,15 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
                goto err_stop_eqs;
        }
 
+       if (map_bf_area(dev))
+               dev_err(&pdev->dev, "Failed to map blue flame area\n");
+
+       err = mlx5_irq_set_affinity_hints(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+               goto err_unmap_bf_area;
+       }
+
        MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
 
        mlx5_init_cq_table(dev);
@@ -737,8 +1053,34 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
        mlx5_init_srq_table(dev);
        mlx5_init_mr_table(dev);
 
+       err = mlx5_register_device(dev);
+       if (err) {
+               dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+               goto err_reg_dev;
+       }
+
+       err = request_module_nowait(MLX5_IB_MOD);
+       if (err)
+               pr_info("failed request module on %s\n", MLX5_IB_MOD);
+
+       dev->interface_state = MLX5_INTERFACE_STATE_UP;
+out:
+       mutex_unlock(&dev->intf_state_mutex);
+
        return 0;
 
+err_reg_dev:
+       mlx5_cleanup_mr_table(dev);
+       mlx5_cleanup_srq_table(dev);
+       mlx5_cleanup_qp_table(dev);
+       mlx5_cleanup_cq_table(dev);
+       mlx5_irq_clear_affinity_hints(dev);
+
+err_unmap_bf_area:
+       unmap_bf_area(dev);
+
+       free_comp_eqs(dev);
+
 err_stop_eqs:
        mlx5_stop_eqs(dev);
 
@@ -755,7 +1097,7 @@ err_stop_poll:
        mlx5_stop_health_poll(dev);
        if (mlx5_cmd_teardown_hca(dev)) {
                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
-               return err;
+               goto out_err;
        }
 
 err_pagealloc_stop:
@@ -771,167 +1113,55 @@ err_pagealloc_cleanup:
        mlx5_pagealloc_cleanup(dev);
        mlx5_cmd_cleanup(dev);
 
-err_unmap:
-       iounmap(dev->iseg);
-
-err_clr_master:
-       pci_clear_master(dev->pdev);
-       release_bar(dev->pdev);
-
-err_disable:
-       pci_disable_device(dev->pdev);
+out_err:
+       dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+       mutex_unlock(&dev->intf_state_mutex);
 
-err_dbg:
-       debugfs_remove(priv->dbg_root);
        return err;
 }
 
-static void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
+static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
-       struct mlx5_priv *priv = &dev->priv;
+       int err = 0;
 
+       mutex_lock(&dev->intf_state_mutex);
+       if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) {
+               dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
+                        __func__);
+               goto out;
+       }
+       mlx5_unregister_device(dev);
+       mlx5_cleanup_mr_table(dev);
        mlx5_cleanup_srq_table(dev);
        mlx5_cleanup_qp_table(dev);
        mlx5_cleanup_cq_table(dev);
+       mlx5_irq_clear_affinity_hints(dev);
+       unmap_bf_area(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
        mlx5_free_uuars(dev, &priv->uuari);
        mlx5_eq_cleanup(dev);
        mlx5_disable_msix(dev);
        mlx5_stop_health_poll(dev);
-       if (mlx5_cmd_teardown_hca(dev)) {
+       err = mlx5_cmd_teardown_hca(dev);
+       if (err) {
                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
-               return;
+               goto out;
        }
        mlx5_pagealloc_stop(dev);
        mlx5_reclaim_startup_pages(dev);
        mlx5_core_disable_hca(dev);
        mlx5_pagealloc_cleanup(dev);
        mlx5_cmd_cleanup(dev);
-       iounmap(dev->iseg);
-       pci_clear_master(dev->pdev);
-       release_bar(dev->pdev);
-       pci_disable_device(dev->pdev);
-       debugfs_remove(priv->dbg_root);
-}
-
-static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
-       struct mlx5_device_context *dev_ctx;
-       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
-       dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
-       if (!dev_ctx) {
-               pr_warn("mlx5_add_device: alloc context failed\n");
-               return;
-       }
-
-       dev_ctx->intf    = intf;
-       dev_ctx->context = intf->add(dev);
-
-       if (dev_ctx->context) {
-               spin_lock_irq(&priv->ctx_lock);
-               list_add_tail(&dev_ctx->list, &priv->ctx_list);
-               spin_unlock_irq(&priv->ctx_lock);
-       } else {
-               kfree(dev_ctx);
-       }
-}
-
-static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
-       struct mlx5_device_context *dev_ctx;
-       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
-       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-               if (dev_ctx->intf == intf) {
-                       spin_lock_irq(&priv->ctx_lock);
-                       list_del(&dev_ctx->list);
-                       spin_unlock_irq(&priv->ctx_lock);
-
-                       intf->remove(dev, dev_ctx->context);
-                       kfree(dev_ctx);
-                       return;
-               }
-}
-static int mlx5_register_device(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_interface *intf;
-
-       mutex_lock(&intf_mutex);
-       list_add_tail(&priv->dev_list, &dev_list);
-       list_for_each_entry(intf, &intf_list, list)
-               mlx5_add_device(intf, priv);
-       mutex_unlock(&intf_mutex);
 
-       return 0;
-}
-static void mlx5_unregister_device(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_interface *intf;
-
-       mutex_lock(&intf_mutex);
-       list_for_each_entry(intf, &intf_list, list)
-               mlx5_remove_device(intf, priv);
-       list_del(&priv->dev_list);
-       mutex_unlock(&intf_mutex);
-}
-
-int mlx5_register_interface(struct mlx5_interface *intf)
-{
-       struct mlx5_priv *priv;
-
-       if (!intf->add || !intf->remove)
-               return -EINVAL;
-
-       mutex_lock(&intf_mutex);
-       list_add_tail(&intf->list, &intf_list);
-       list_for_each_entry(priv, &dev_list, dev_list)
-               mlx5_add_device(intf, priv);
-       mutex_unlock(&intf_mutex);
-
-       return 0;
-}
-EXPORT_SYMBOL(mlx5_register_interface);
-
-void mlx5_unregister_interface(struct mlx5_interface *intf)
-{
-       struct mlx5_priv *priv;
-
-       mutex_lock(&intf_mutex);
-       list_for_each_entry(priv, &dev_list, dev_list)
-              mlx5_remove_device(intf, priv);
-       list_del(&intf->list);
-       mutex_unlock(&intf_mutex);
-}
-EXPORT_SYMBOL(mlx5_unregister_interface);
-
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
-       struct mlx5_priv *priv = &mdev->priv;
-       struct mlx5_device_context *dev_ctx;
-       unsigned long flags;
-       void *result = NULL;
-
-       spin_lock_irqsave(&priv->ctx_lock, flags);
-
-       list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
-               if ((dev_ctx->intf->protocol == protocol) &&
-                   dev_ctx->intf->get_dev) {
-                       result = dev_ctx->intf->get_dev(dev_ctx->context);
-                       break;
-               }
-
-       spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
-       return result;
+out:
+       dev->interface_state = MLX5_INTERFACE_STATE_DOWN;
+       mutex_unlock(&dev->intf_state_mutex);
+       return err;
 }
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
 
-static void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-                           unsigned long param)
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+                    unsigned long param)
 {
        struct mlx5_priv *priv = &dev->priv;
        struct mlx5_device_context *dev_ctx;
@@ -952,7 +1182,6 @@ struct mlx5_core_event_handler {
                      void *data);
 };
 
-#define MLX5_IB_MOD "mlx5_ib"
 
 static int init_one(struct pci_dev *pdev,
                    const struct pci_device_id *id)
@@ -976,43 +1205,166 @@ static int init_one(struct pci_dev *pdev,
                prof_sel = MLX5_DEFAULT_PROF;
        }
        dev->profile = &profile[prof_sel];
+       dev->pdev = pdev;
        dev->event = mlx5_core_event;
 
        INIT_LIST_HEAD(&priv->ctx_list);
        spin_lock_init(&priv->ctx_lock);
-       err = mlx5_dev_init(dev, pdev);
+       mutex_init(&dev->pci_status_mutex);
+       mutex_init(&dev->intf_state_mutex);
+       err = mlx5_pci_init(dev, priv);
        if (err) {
-               dev_err(&pdev->dev, "mlx5_dev_init failed %d\n", err);
-               goto out;
+               dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
+               goto clean_dev;
        }
 
-       err = mlx5_register_device(dev);
+       err = mlx5_health_init(dev);
        if (err) {
-               dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
-               goto out_init;
+               dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
+               goto close_pci;
        }
 
-       err = request_module_nowait(MLX5_IB_MOD);
-       if (err)
-               pr_info("failed request module on %s\n", MLX5_IB_MOD);
+       err = mlx5_load_one(dev, priv);
+       if (err) {
+               dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
+               goto clean_health;
+       }
 
        return 0;
 
-out_init:
-       mlx5_dev_cleanup(dev);
-out:
+clean_health:
+       mlx5_health_cleanup(dev);
+close_pci:
+       mlx5_pci_close(dev, priv);
+clean_dev:
+       pci_set_drvdata(pdev, NULL);
        kfree(dev);
+
        return err;
 }
+
 static void remove_one(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+       struct mlx5_priv *priv = &dev->priv;
 
-       mlx5_unregister_device(dev);
-       mlx5_dev_cleanup(dev);
+       if (mlx5_unload_one(dev, priv)) {
+               dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
+               mlx5_health_cleanup(dev);
+               return;
+       }
+       mlx5_health_cleanup(dev);
+       mlx5_pci_close(dev, priv);
+       pci_set_drvdata(pdev, NULL);
        kfree(dev);
 }
 
+static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+                                             pci_channel_state_t state)
+{
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+       struct mlx5_priv *priv = &dev->priv;
+
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+       mlx5_enter_error_state(dev);
+       mlx5_unload_one(dev, priv);
+       mlx5_pci_disable_device(dev);
+       return state == pci_channel_io_perm_failure ?
+               PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+{
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+       int err = 0;
+
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+
+       err = mlx5_pci_enable_device(dev);
+       if (err) {
+               dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+                       , __func__, err);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+       pci_set_master(pdev);
+       pci_set_power_state(pdev, PCI_D0);
+       pci_restore_state(pdev);
+
+       return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+/* wait for the device to show vital signs. For now we check
+ * that we can read the device ID and that the health buffer
+ * shows a non zero value which is different than 0xffffffff
+ */
+static void wait_vital(struct pci_dev *pdev)
+{
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+       struct mlx5_core_health *health = &dev->priv.health;
+       const int niter = 100;
+       u32 count;
+       u16 did;
+       int i;
+
+       /* Wait for firmware to be ready after reset */
+       msleep(1000);
+       for (i = 0; i < niter; i++) {
+               if (pci_read_config_word(pdev, 2, &did)) {
+                       dev_warn(&pdev->dev, "failed reading config word\n");
+                       break;
+               }
+               if (did == pdev->device) {
+                       dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
+                       break;
+               }
+               msleep(50);
+       }
+       if (i == niter)
+               dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+
+       for (i = 0; i < niter; i++) {
+               count = ioread32be(health->health_counter);
+               if (count && count != 0xffffffff) {
+                       dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+                       break;
+               }
+               msleep(50);
+       }
+
+       if (i == niter)
+               dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+       struct mlx5_priv *priv = &dev->priv;
+       int err;
+
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+
+       pci_save_state(pdev);
+       wait_vital(pdev);
+
+       err = mlx5_load_one(dev, priv);
+       if (err)
+               dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
+                       , __func__, err);
+       else
+               dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+}
+
+static const struct pci_error_handlers mlx5_err_handler = {
+       .error_detected = mlx5_pci_err_detected,
+       .slot_reset     = mlx5_pci_slot_reset,
+       .resume         = mlx5_pci_resume
+};
+
 static const struct pci_device_id mlx5_core_pci_table[] = {
        { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */
        { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */
@@ -1029,7 +1381,8 @@ static struct pci_driver mlx5_core_driver = {
        .name           = DRIVER_NAME,
        .id_table       = mlx5_core_pci_table,
        .probe          = init_one,
-       .remove         = remove_one
+       .remove         = remove_one,
+       .err_handler    = &mlx5_err_handler
 };
 
 static int __init init(void)
@@ -1037,22 +1390,17 @@ static int __init init(void)
        int err;
 
        mlx5_register_debugfs();
-       mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq");
-       if (!mlx5_core_wq) {
-               err = -ENOMEM;
-               goto err_debug;
-       }
-       mlx5_health_init();
 
        err = pci_register_driver(&mlx5_core_driver);
        if (err)
-               goto err_health;
+               goto err_debug;
+
+#ifdef CONFIG_MLX5_CORE_EN
+       mlx5e_init();
+#endif
 
        return 0;
 
-err_health:
-       mlx5_health_cleanup();
-       destroy_workqueue(mlx5_core_wq);
 err_debug:
        mlx5_unregister_debugfs();
        return err;
@@ -1060,9 +1408,10 @@ err_debug:
 
 static void __exit cleanup(void)
 {
+#ifdef CONFIG_MLX5_CORE_EN
+       mlx5e_cleanup();
+#endif
        pci_unregister_driver(&mlx5_core_driver);
-       mlx5_health_cleanup();
-       destroy_workqueue(mlx5_core_wq);
        mlx5_unregister_debugfs();
 }