These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / infiniband / core / verbs.c
index f93eb8d..545906d 100644 (file)
@@ -41,6 +41,9 @@
 #include <linux/export.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/addrconf.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 
 #include "core_priv.h"
 
+static const char * const ib_events[] = {
+       [IB_EVENT_CQ_ERR]               = "CQ error",
+       [IB_EVENT_QP_FATAL]             = "QP fatal error",
+       [IB_EVENT_QP_REQ_ERR]           = "QP request error",
+       [IB_EVENT_QP_ACCESS_ERR]        = "QP access error",
+       [IB_EVENT_COMM_EST]             = "communication established",
+       [IB_EVENT_SQ_DRAINED]           = "send queue drained",
+       [IB_EVENT_PATH_MIG]             = "path migration successful",
+       [IB_EVENT_PATH_MIG_ERR]         = "path migration error",
+       [IB_EVENT_DEVICE_FATAL]         = "device fatal error",
+       [IB_EVENT_PORT_ACTIVE]          = "port active",
+       [IB_EVENT_PORT_ERR]             = "port error",
+       [IB_EVENT_LID_CHANGE]           = "LID change",
+       [IB_EVENT_PKEY_CHANGE]          = "P_key change",
+       [IB_EVENT_SM_CHANGE]            = "SM change",
+       [IB_EVENT_SRQ_ERR]              = "SRQ error",
+       [IB_EVENT_SRQ_LIMIT_REACHED]    = "SRQ limit reached",
+       [IB_EVENT_QP_LAST_WQE_REACHED]  = "last WQE reached",
+       [IB_EVENT_CLIENT_REREGISTER]    = "client reregister",
+       [IB_EVENT_GID_CHANGE]           = "GID changed",
+};
+
+const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
+{
+       size_t index = event;
+
+       return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ?
+                       ib_events[index] : "unrecognized event";
+}
+EXPORT_SYMBOL(ib_event_msg);
+
+static const char * const wc_statuses[] = {
+       [IB_WC_SUCCESS]                 = "success",
+       [IB_WC_LOC_LEN_ERR]             = "local length error",
+       [IB_WC_LOC_QP_OP_ERR]           = "local QP operation error",
+       [IB_WC_LOC_EEC_OP_ERR]          = "local EE context operation error",
+       [IB_WC_LOC_PROT_ERR]            = "local protection error",
+       [IB_WC_WR_FLUSH_ERR]            = "WR flushed",
+       [IB_WC_MW_BIND_ERR]             = "memory management operation error",
+       [IB_WC_BAD_RESP_ERR]            = "bad response error",
+       [IB_WC_LOC_ACCESS_ERR]          = "local access error",
+       [IB_WC_REM_INV_REQ_ERR]         = "invalid request error",
+       [IB_WC_REM_ACCESS_ERR]          = "remote access error",
+       [IB_WC_REM_OP_ERR]              = "remote operation error",
+       [IB_WC_RETRY_EXC_ERR]           = "transport retry counter exceeded",
+       [IB_WC_RNR_RETRY_EXC_ERR]       = "RNR retry counter exceeded",
+       [IB_WC_LOC_RDD_VIOL_ERR]        = "local RDD violation error",
+       [IB_WC_REM_INV_RD_REQ_ERR]      = "remote invalid RD request",
+       [IB_WC_REM_ABORT_ERR]           = "operation aborted",
+       [IB_WC_INV_EECN_ERR]            = "invalid EE context number",
+       [IB_WC_INV_EEC_STATE_ERR]       = "invalid EE context state",
+       [IB_WC_FATAL_ERR]               = "fatal error",
+       [IB_WC_RESP_TIMEOUT_ERR]        = "response timeout error",
+       [IB_WC_GENERAL_ERR]             = "general error",
+};
+
+const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
+{
+       size_t index = status;
+
+       return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ?
+                       wc_statuses[index] : "unrecognized status";
+}
+EXPORT_SYMBOL(ib_wc_status_msg);
+
 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
 {
        switch (rate) {
@@ -148,28 +216,79 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
 
 /* Protection domains */
 
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ *
+ * Every PD has a local_dma_lkey which can be used as the lkey value for local
+ * memory operations.
+ */
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
 {
        struct ib_pd *pd;
+       struct ib_device_attr devattr;
+       int rc;
+
+       rc = ib_query_device(device, &devattr);
+       if (rc)
+               return ERR_PTR(rc);
 
        pd = device->alloc_pd(device, NULL, NULL);
+       if (IS_ERR(pd))
+               return pd;
+
+       pd->device = device;
+       pd->uobject = NULL;
+       pd->local_mr = NULL;
+       atomic_set(&pd->usecnt, 0);
+
+       if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+               pd->local_dma_lkey = device->local_dma_lkey;
+       else {
+               struct ib_mr *mr;
+
+               mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
+               if (IS_ERR(mr)) {
+                       ib_dealloc_pd(pd);
+                       return (struct ib_pd *)mr;
+               }
 
-       if (!IS_ERR(pd)) {
-               pd->device  = device;
-               pd->uobject = NULL;
-               atomic_set(&pd->usecnt, 0);
+               pd->local_mr = mr;
+               pd->local_dma_lkey = pd->local_mr->lkey;
        }
-
        return pd;
 }
 EXPORT_SYMBOL(ib_alloc_pd);
 
-int ib_dealloc_pd(struct ib_pd *pd)
+/**
+ * ib_dealloc_pd - Deallocates a protection domain.
+ * @pd: The protection domain to deallocate.
+ *
+ * It is an error to call this function while any resources in the pd still
+ * exist.  The caller is responsible to synchronously destroy them and
+ * guarantee no new allocations will happen.
+ */
+void ib_dealloc_pd(struct ib_pd *pd)
 {
-       if (atomic_read(&pd->usecnt))
-               return -EBUSY;
+       int ret;
 
-       return pd->device->dealloc_pd(pd);
+       if (pd->local_mr) {
+               ret = ib_dereg_mr(pd->local_mr);
+               WARN_ON(ret);
+               pd->local_mr = NULL;
+       }
+
+       /* uverbs manipulates usecnt with proper locking, while the kabi
+          requires the caller to guarantee we can't race here. */
+       WARN_ON(atomic_read(&pd->usecnt));
+
+       /* Making delalloc_pd a void return is a WIP, no driver should return
+          an error here. */
+       ret = pd->device->dealloc_pd(pd);
+       WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
 }
 EXPORT_SYMBOL(ib_dealloc_pd);
 
@@ -192,32 +311,69 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
-                      struct ib_grh *grh, struct ib_ah_attr *ah_attr)
+struct find_gid_index_context {
+       u16 vlan_id;
+};
+
+static bool find_gid_index(const union ib_gid *gid,
+                          const struct ib_gid_attr *gid_attr,
+                          void *context)
+{
+       struct find_gid_index_context *ctx =
+               (struct find_gid_index_context *)context;
+
+       if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
+           (is_vlan_dev(gid_attr->ndev) &&
+            vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+               return false;
+
+       return true;
+}
+
+static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
+                                  u16 vlan_id, const union ib_gid *sgid,
+                                  u16 *gid_index)
+{
+       struct find_gid_index_context context = {.vlan_id = vlan_id};
+
+       return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+                                    &context, gid_index);
+}
+
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
+                      const struct ib_wc *wc, const struct ib_grh *grh,
+                      struct ib_ah_attr *ah_attr)
 {
        u32 flow_class;
        u16 gid_index;
        int ret;
-       int is_eth = (rdma_port_get_link_layer(device, port_num) ==
-                       IB_LINK_LAYER_ETHERNET);
 
        memset(ah_attr, 0, sizeof *ah_attr);
-       if (is_eth) {
+       if (rdma_cap_eth_ah(device, port_num)) {
+               u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
+                               wc->vlan_id : 0xffff;
+
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
 
-               if (wc->wc_flags & IB_WC_WITH_SMAC &&
-                   wc->wc_flags & IB_WC_WITH_VLAN) {
-                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
-                       ah_attr->vlan_id = wc->vlan_id;
-               } else {
+               if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
+                   !(wc->wc_flags & IB_WC_WITH_VLAN)) {
                        ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
-                                       ah_attr->dmac, &ah_attr->vlan_id);
+                                                        ah_attr->dmac,
+                                                        wc->wc_flags & IB_WC_WITH_VLAN ?
+                                                        NULL : &vlan_id,
+                                                        0);
                        if (ret)
                                return ret;
                }
-       } else {
-               ah_attr->vlan_id = 0xffff;
+
+               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+                                             &grh->dgid, &gid_index);
+               if (ret)
+                       return ret;
+
+               if (wc->wc_flags & IB_WC_WITH_SMAC)
+                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
        }
 
        ah_attr->dlid = wc->slid;
@@ -229,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
                ah_attr->ah_flags = IB_AH_GRH;
                ah_attr->grh.dgid = grh->sgid;
 
-               ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
-                                        &gid_index);
-               if (ret)
-                       return ret;
+               if (!rdma_cap_eth_ah(device, port_num)) {
+                       ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+                                                        port_num, NULL,
+                                                        &gid_index);
+                       if (ret)
+                               return ret;
+               }
 
                ah_attr->grh.sgid_index = (u8) gid_index;
                flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -244,8 +403,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
 }
 EXPORT_SYMBOL(ib_init_ah_from_wc);
 
-struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
-                                  struct ib_grh *grh, u8 port_num)
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
+                                  const struct ib_grh *grh, u8 port_num)
 {
        struct ib_ah_attr ah_attr;
        int ret;
@@ -502,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp);
 static const struct {
        int                     valid;
        enum ib_qp_attr_mask    req_param[IB_QPT_MAX];
-       enum ib_qp_attr_mask    req_param_add_eth[IB_QPT_MAX];
        enum ib_qp_attr_mask    opt_param[IB_QPT_MAX];
-       enum ib_qp_attr_mask    opt_param_add_eth[IB_QPT_MAX];
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
        [IB_QPS_RESET] = {
                [IB_QPS_RESET] = { .valid = 1 },
@@ -585,12 +742,6 @@ static const struct {
                                                IB_QP_MAX_DEST_RD_ATOMIC        |
                                                IB_QP_MIN_RNR_TIMER),
                        },
-                       .req_param_add_eth = {
-                               [IB_QPT_RC]  = (IB_QP_SMAC),
-                               [IB_QPT_UC]  = (IB_QP_SMAC),
-                               [IB_QPT_XRC_INI]  = (IB_QP_SMAC),
-                               [IB_QPT_XRC_TGT]  = (IB_QP_SMAC)
-                       },
                        .opt_param = {
                                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX               |
                                                 IB_QP_QKEY),
@@ -611,21 +762,7 @@ static const struct {
                                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX               |
                                                 IB_QP_QKEY),
                         },
-                       .opt_param_add_eth = {
-                               [IB_QPT_RC]  = (IB_QP_ALT_SMAC                  |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID),
-                               [IB_QPT_UC]  = (IB_QP_ALT_SMAC                  |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID),
-                               [IB_QPT_XRC_INI]  = (IB_QP_ALT_SMAC                     |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID),
-                               [IB_QPT_XRC_TGT]  = (IB_QP_ALT_SMAC                     |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID)
-                       }
-               }
+               },
        },
        [IB_QPS_RTR]   = {
                [IB_QPS_RESET] = { .valid = 1 },
@@ -847,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
        req_param = qp_state_table[cur_state][next_state].req_param[type];
        opt_param = qp_state_table[cur_state][next_state].opt_param[type];
 
-       if (ll == IB_LINK_LAYER_ETHERNET) {
-               req_param |= qp_state_table[cur_state][next_state].
-                       req_param_add_eth[type];
-               opt_param |= qp_state_table[cur_state][next_state].
-                       opt_param_add_eth[type];
-       }
-
        if ((mask & req_param) != req_param)
                return 0;
 
@@ -864,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 }
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
-                           struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+                       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
 {
        int           ret = 0;
-       union ib_gid  sgid;
 
-       if ((*qp_attr_mask & IB_QP_AV)  &&
-           (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) {
-               ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
-                                  qp_attr->ah_attr.grh.sgid_index, &sgid);
-               if (ret)
-                       goto out;
+       if (*qp_attr_mask & IB_QP_AV) {
+               if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
+                   qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
+                       return -EINVAL;
+
+               if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
+                       return 0;
+
                if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
-                       rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
-                       rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
-                       if (!(*qp_attr_mask & IB_QP_VID))
-                               qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+                       rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
+                                       qp_attr->ah_attr.dmac);
                } else {
-                       ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
-                                       qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
-                       if (ret)
-                               goto out;
-                       ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
-                       if (ret)
+                       union ib_gid            sgid;
+                       struct ib_gid_attr      sgid_attr;
+                       int                     ifindex;
+
+                       ret = ib_query_gid(qp->device,
+                                          qp_attr->ah_attr.port_num,
+                                          qp_attr->ah_attr.grh.sgid_index,
+                                          &sgid, &sgid_attr);
+
+                       if (ret || !sgid_attr.ndev) {
+                               if (!ret)
+                                       ret = -ENXIO;
                                goto out;
+                       }
+
+                       ifindex = sgid_attr.ndev->ifindex;
+
+                       ret = rdma_addr_find_dmac_by_grh(&sgid,
+                                                        &qp_attr->ah_attr.grh.dgid,
+                                                        qp_attr->ah_attr.dmac,
+                                                        NULL, ifindex);
+
+                       dev_put(sgid_attr.ndev);
                }
-               *qp_attr_mask |= IB_QP_SMAC;
-               if (qp_attr->vlan_id < 0xFFFF)
-                       *qp_attr_mask |= IB_QP_VID;
        }
 out:
        return ret;
 }
-EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+EXPORT_SYMBOL(ib_resolve_eth_dmac);
 
 
 int ib_modify_qp(struct ib_qp *qp,
@@ -906,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp,
 {
        int ret;
 
-       ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+       ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
        if (ret)
                return ret;
 
@@ -1012,11 +1154,12 @@ EXPORT_SYMBOL(ib_destroy_qp);
 struct ib_cq *ib_create_cq(struct ib_device *device,
                           ib_comp_handler comp_handler,
                           void (*event_handler)(struct ib_event *, void *),
-                          void *cq_context, int cqe, int comp_vector)
+                          void *cq_context,
+                          const struct ib_cq_init_attr *cq_attr)
 {
        struct ib_cq *cq;
 
-       cq = device->create_cq(device, cqe, comp_vector, NULL, NULL);
+       cq = device->create_cq(device, cq_attr, NULL, NULL);
 
        if (!IS_ERR(cq)) {
                cq->device        = device;
@@ -1079,73 +1222,6 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
 }
 EXPORT_SYMBOL(ib_get_dma_mr);
 
-struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
-                            struct ib_phys_buf *phys_buf_array,
-                            int num_phys_buf,
-                            int mr_access_flags,
-                            u64 *iova_start)
-{
-       struct ib_mr *mr;
-       int err;
-
-       err = ib_check_mr_access(mr_access_flags);
-       if (err)
-               return ERR_PTR(err);
-
-       if (!pd->device->reg_phys_mr)
-               return ERR_PTR(-ENOSYS);
-
-       mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
-                                    mr_access_flags, iova_start);
-
-       if (!IS_ERR(mr)) {
-               mr->device  = pd->device;
-               mr->pd      = pd;
-               mr->uobject = NULL;
-               atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
-       }
-
-       return mr;
-}
-EXPORT_SYMBOL(ib_reg_phys_mr);
-
-int ib_rereg_phys_mr(struct ib_mr *mr,
-                    int mr_rereg_mask,
-                    struct ib_pd *pd,
-                    struct ib_phys_buf *phys_buf_array,
-                    int num_phys_buf,
-                    int mr_access_flags,
-                    u64 *iova_start)
-{
-       struct ib_pd *old_pd;
-       int ret;
-
-       ret = ib_check_mr_access(mr_access_flags);
-       if (ret)
-               return ret;
-
-       if (!mr->device->rereg_phys_mr)
-               return -ENOSYS;
-
-       if (atomic_read(&mr->usecnt))
-               return -EBUSY;
-
-       old_pd = mr->pd;
-
-       ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
-                                       phys_buf_array, num_phys_buf,
-                                       mr_access_flags, iova_start);
-
-       if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
-               atomic_dec(&old_pd->usecnt);
-               atomic_inc(&pd->usecnt);
-       }
-
-       return ret;
-}
-EXPORT_SYMBOL(ib_rereg_phys_mr);
-
 int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
 {
        return mr->device->query_mr ?
@@ -1170,54 +1246,28 @@ int ib_dereg_mr(struct ib_mr *mr)
 }
 EXPORT_SYMBOL(ib_dereg_mr);
 
-struct ib_mr *ib_create_mr(struct ib_pd *pd,
-                          struct ib_mr_init_attr *mr_init_attr)
-{
-       struct ib_mr *mr;
-
-       if (!pd->device->create_mr)
-               return ERR_PTR(-ENOSYS);
-
-       mr = pd->device->create_mr(pd, mr_init_attr);
-
-       if (!IS_ERR(mr)) {
-               mr->device  = pd->device;
-               mr->pd      = pd;
-               mr->uobject = NULL;
-               atomic_inc(&pd->usecnt);
-               atomic_set(&mr->usecnt, 0);
-       }
-
-       return mr;
-}
-EXPORT_SYMBOL(ib_create_mr);
-
-int ib_destroy_mr(struct ib_mr *mr)
-{
-       struct ib_pd *pd;
-       int ret;
-
-       if (atomic_read(&mr->usecnt))
-               return -EBUSY;
-
-       pd = mr->pd;
-       ret = mr->device->destroy_mr(mr);
-       if (!ret)
-               atomic_dec(&pd->usecnt);
-
-       return ret;
-}
-EXPORT_SYMBOL(ib_destroy_mr);
-
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
+/**
+ * ib_alloc_mr() - Allocates a memory region
+ * @pd:            protection domain associated with the region
+ * @mr_type:       memory region type
+ * @max_num_sg:    maximum sg entries available for registration.
+ *
+ * Notes:
+ * Memory registeration page/sg lists must not exceed max_num_sg.
+ * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
+ * max_num_sg * used_page_size.
+ *
+ */
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+                         enum ib_mr_type mr_type,
+                         u32 max_num_sg)
 {
        struct ib_mr *mr;
 
-       if (!pd->device->alloc_fast_reg_mr)
+       if (!pd->device->alloc_mr)
                return ERR_PTR(-ENOSYS);
 
-       mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
-
+       mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
        if (!IS_ERR(mr)) {
                mr->device  = pd->device;
                mr->pd      = pd;
@@ -1228,32 +1278,7 @@ struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
 
        return mr;
 }
-EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
-
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
-                                                         int max_page_list_len)
-{
-       struct ib_fast_reg_page_list *page_list;
-
-       if (!device->alloc_fast_reg_page_list)
-               return ERR_PTR(-ENOSYS);
-
-       page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
-
-       if (!IS_ERR(page_list)) {
-               page_list->device = device;
-               page_list->max_page_list_len = max_page_list_len;
-       }
-
-       return page_list;
-}
-EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
-
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
-       page_list->device->free_fast_reg_page_list(page_list);
-}
-EXPORT_SYMBOL(ib_free_fast_reg_page_list);
+EXPORT_SYMBOL(ib_alloc_mr);
 
 /* Memory windows */
 
@@ -1446,3 +1471,111 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
                mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
 }
 EXPORT_SYMBOL(ib_check_mr_status);
+
+/**
+ * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
+ *     and set it the memory region.
+ * @mr:            memory region
+ * @sg:            dma mapped scatterlist
+ * @sg_nents:      number of entries in sg
+ * @page_size:     page vector desired page size
+ *
+ * Constraints:
+ * - The first sg element is allowed to have an offset.
+ * - Each sg element must be aligned to page_size (or physically
+ *   contiguous to the previous element). In case an sg element has a
+ *   non contiguous offset, the mapping prefix will not include it.
+ * - The last sg element is allowed to have length less than page_size.
+ * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
+ *   then only max_num_sg entries will be mapped.
+ *
+ * Returns the number of sg elements that were mapped to the memory region.
+ *
+ * After this completes successfully, the  memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg(struct ib_mr *mr,
+                struct scatterlist *sg,
+                int sg_nents,
+                unsigned int page_size)
+{
+       if (unlikely(!mr->device->map_mr_sg))
+               return -ENOSYS;
+
+       mr->page_size = page_size;
+
+       return mr->device->map_mr_sg(mr, sg, sg_nents);
+}
+EXPORT_SYMBOL(ib_map_mr_sg);
+
+/**
+ * ib_sg_to_pages() - Convert the largest prefix of a sg list
+ *     to a page vector
+ * @mr:            memory region
+ * @sgl:           dma mapped scatterlist
+ * @sg_nents:      number of entries in sg
+ * @set_page:      driver page assignment function pointer
+ *
+ * Core service helper for drivers to convert the largest
+ * prefix of given sg list to a page vector. The sg list
+ * prefix converted is the prefix that meet the requirements
+ * of ib_map_mr_sg.
+ *
+ * Returns the number of sg elements that were assigned to
+ * a page vector.
+ */
+int ib_sg_to_pages(struct ib_mr *mr,
+                  struct scatterlist *sgl,
+                  int sg_nents,
+                  int (*set_page)(struct ib_mr *, u64))
+{
+       struct scatterlist *sg;
+       u64 last_end_dma_addr = 0, last_page_addr = 0;
+       unsigned int last_page_off = 0;
+       u64 page_mask = ~((u64)mr->page_size - 1);
+       int i, ret;
+
+       mr->iova = sg_dma_address(&sgl[0]);
+       mr->length = 0;
+
+       for_each_sg(sgl, sg, sg_nents, i) {
+               u64 dma_addr = sg_dma_address(sg);
+               unsigned int dma_len = sg_dma_len(sg);
+               u64 end_dma_addr = dma_addr + dma_len;
+               u64 page_addr = dma_addr & page_mask;
+
+               /*
+                * For the second and later elements, check whether either the
+                * end of element i-1 or the start of element i is not aligned
+                * on a page boundary.
+                */
+               if (i && (last_page_off != 0 || page_addr != dma_addr)) {
+                       /* Stop mapping if there is a gap. */
+                       if (last_end_dma_addr != dma_addr)
+                               break;
+
+                       /*
+                        * Coalesce this element with the last. If it is small
+                        * enough just update mr->length. Otherwise start
+                        * mapping from the next page.
+                        */
+                       goto next_page;
+               }
+
+               do {
+                       ret = set_page(mr, page_addr);
+                       if (unlikely(ret < 0))
+                               return i ? : ret;
+next_page:
+                       page_addr += mr->page_size;
+               } while (page_addr < end_dma_addr);
+
+               mr->length += dma_len;
+               last_end_dma_addr = end_dma_addr;
+               last_page_addr = end_dma_addr & page_mask;
+               last_page_off = end_dma_addr & ~page_mask;
+       }
+
+       return i;
+}
+EXPORT_SYMBOL(ib_sg_to_pages);