These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / sunrpc / xprtrdma / svc_rdma_transport.c
index f609c1c..b348b4a 100644 (file)
@@ -56,6 +56,7 @@
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
+static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
                                        struct net *net,
                                        struct sockaddr *sa, int salen,
@@ -95,16 +96,69 @@ struct svc_xprt_class svc_rdma_class = {
        .xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
+                                          struct sockaddr *, int, int);
+static void svc_rdma_bc_detach(struct svc_xprt *);
+static void svc_rdma_bc_free(struct svc_xprt *);
+
+static struct svc_xprt_ops svc_rdma_bc_ops = {
+       .xpo_create = svc_rdma_bc_create,
+       .xpo_detach = svc_rdma_bc_detach,
+       .xpo_free = svc_rdma_bc_free,
+       .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
+       .xpo_secure_port = svc_rdma_secure_port,
+};
+
+struct svc_xprt_class svc_rdma_bc_class = {
+       .xcl_name = "rdma-bc",
+       .xcl_owner = THIS_MODULE,
+       .xcl_ops = &svc_rdma_bc_ops,
+       .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
+};
+
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
+                                          struct net *net,
+                                          struct sockaddr *sa, int salen,
+                                          int flags)
+{
+       struct svcxprt_rdma *cma_xprt;
+       struct svc_xprt *xprt;
+
+       cma_xprt = rdma_create_xprt(serv, 0);
+       if (!cma_xprt)
+               return ERR_PTR(-ENOMEM);
+       xprt = &cma_xprt->sc_xprt;
+
+       svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+       serv->sv_bc_xprt = xprt;
+
+       dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+       return xprt;
+}
+
+static void svc_rdma_bc_detach(struct svc_xprt *xprt)
+{
+       dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+}
+
+static void svc_rdma_bc_free(struct svc_xprt *xprt)
+{
+       struct svcxprt_rdma *rdma =
+               container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+       dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+       if (xprt)
+               kfree(rdma);
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
        struct svc_rdma_op_ctxt *ctxt;
 
-       while (1) {
-               ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
-               if (ctxt)
-                       break;
-               schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-       }
+       ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
+                               GFP_KERNEL | __GFP_NOFAIL);
        ctxt->xprt = xprt;
        INIT_LIST_HEAD(&ctxt->dto_q);
        ctxt->count = 0;
@@ -156,12 +210,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 struct svc_rdma_req_map *svc_rdma_get_req_map(void)
 {
        struct svc_rdma_req_map *map;
-       while (1) {
-               map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
-               if (map)
-                       break;
-               schedule_timeout_uninterruptible(msecs_to_jiffies(500));
-       }
+       map = kmem_cache_alloc(svc_rdma_map_cachep,
+                              GFP_KERNEL | __GFP_NOFAIL);
        map->count = 0;
        return map;
 }
@@ -175,8 +225,8 @@ void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
 static void cq_event_handler(struct ib_event *event, void *context)
 {
        struct svc_xprt *xprt = context;
-       dprintk("svcrdma: received CQ event id=%d, context=%p\n",
-               event->event, context);
+       dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
+               ib_event_msg(event->event), event->event, context);
        set_bit(XPT_CLOSE, &xprt->xpt_flags);
 }
 
@@ -191,8 +241,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
        case IB_EVENT_COMM_EST:
        case IB_EVENT_SQ_DRAINED:
        case IB_EVENT_QP_LAST_WQE_REACHED:
-               dprintk("svcrdma: QP event %d received for QP=%p\n",
-                       event->event, event->element.qp);
+               dprintk("svcrdma: QP event %s (%d) received for QP=%p\n",
+                       ib_event_msg(event->event), event->event,
+                       event->element.qp);
                break;
        /* These are considered fatal events */
        case IB_EVENT_PATH_MIG_ERR:
@@ -201,9 +252,10 @@ static void qp_event_handler(struct ib_event *event, void *context)
        case IB_EVENT_QP_ACCESS_ERR:
        case IB_EVENT_DEVICE_FATAL:
        default:
-               dprintk("svcrdma: QP ERROR event %d received for QP=%p, "
+               dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, "
                        "closing transport\n",
-                       event->event, event->element.qp);
+                       ib_event_msg(event->event), event->event,
+                       event->element.qp);
                set_bit(XPT_CLOSE, &xprt->xpt_flags);
                break;
        }
@@ -402,7 +454,8 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
                for (i = 0; i < ret; i++) {
                        wc = &wc_a[i];
                        if (wc->status != IB_WC_SUCCESS) {
-                               dprintk("svcrdma: sq wc err status %d\n",
+                               dprintk("svcrdma: sq wc err status %s (%d)\n",
+                                       ib_wc_status_msg(wc->status),
                                        wc->status);
 
                                /* Close the transport */
@@ -490,18 +543,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        return cma_xprt;
 }
 
-struct page *svc_rdma_get_page(void)
-{
-       struct page *page;
-
-       while ((page = alloc_page(GFP_KERNEL)) == NULL) {
-               /* If we can't get memory, wait a bit and try again */
-               printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
-               schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
-       }
-       return page;
-}
-
 int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 {
        struct ib_recv_wr recv_wr, *bad_recv_wr;
@@ -520,7 +561,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                        pr_err("svcrdma: Too many sges (%d)\n", sge_no);
                        goto err_put_ctxt;
                }
-               page = svc_rdma_get_page();
+               page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
                ctxt->pages[sge_no] = page;
                pa = ib_dma_map_page(xprt->sc_cm_id->device,
                                     page, 0, PAGE_SIZE,
@@ -616,7 +657,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
        switch (event->event) {
        case RDMA_CM_EVENT_CONNECT_REQUEST:
                dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
-                       "event=%d\n", cma_id, cma_id->context, event->event);
+                       "event = %s (%d)\n", cma_id, cma_id->context,
+                       rdma_event_msg(event->event), event->event);
                handle_connect_req(cma_id,
                                   event->param.conn.initiator_depth);
                break;
@@ -636,7 +678,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
 
        default:
                dprintk("svcrdma: Unexpected event on listening endpoint %p, "
-                       "event=%d\n", cma_id, event->event);
+                       "event = %s (%d)\n", cma_id,
+                       rdma_event_msg(event->event), event->event);
                break;
        }
 
@@ -669,15 +712,18 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
                break;
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
                dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
-                       "event=%d\n", cma_id, xprt, event->event);
+                       "event = %s (%d)\n", cma_id, xprt,
+                       rdma_event_msg(event->event), event->event);
                if (xprt) {
                        set_bit(XPT_CLOSE, &xprt->xpt_flags);
                        svc_xprt_enqueue(xprt);
+                       svc_xprt_put(xprt);
                }
                break;
        default:
                dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
-                       "event=%d\n", cma_id, event->event);
+                       "event = %s (%d)\n", cma_id,
+                       rdma_event_msg(event->event), event->event);
                break;
        }
        return 0;
@@ -704,8 +750,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
        if (!cma_xprt)
                return ERR_PTR(-ENOMEM);
 
-       listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
-                                  IB_QPT_RC);
+       listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+                                  RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(listen_id)) {
                ret = PTR_ERR(listen_id);
                dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -744,24 +790,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 {
        struct ib_mr *mr;
-       struct ib_fast_reg_page_list *pl;
+       struct scatterlist *sg;
        struct svc_rdma_fastreg_mr *frmr;
+       u32 num_sg;
 
        frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
        if (!frmr)
                goto err;
 
-       mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
+       num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len);
+       mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg);
        if (IS_ERR(mr))
                goto err_free_frmr;
 
-       pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
-                                        RPCSVC_MAXPAGES);
-       if (IS_ERR(pl))
+       sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+       if (!sg)
                goto err_free_mr;
 
+       sg_init_table(sg, RPCSVC_MAXPAGES);
+
        frmr->mr = mr;
-       frmr->page_list = pl;
+       frmr->sg = sg;
        INIT_LIST_HEAD(&frmr->frmr_list);
        return frmr;
 
@@ -781,8 +830,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
                frmr = list_entry(xprt->sc_frmr_q.next,
                                  struct svc_rdma_fastreg_mr, frmr_list);
                list_del_init(&frmr->frmr_list);
+               kfree(frmr->sg);
                ib_dereg_mr(frmr->mr);
-               ib_free_fast_reg_page_list(frmr->page_list);
                kfree(frmr);
        }
 }
@@ -796,8 +845,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
                frmr = list_entry(rdma->sc_frmr_q.next,
                                  struct svc_rdma_fastreg_mr, frmr_list);
                list_del_init(&frmr->frmr_list);
-               frmr->map_len = 0;
-               frmr->page_list_len = 0;
+               frmr->sg_nents = 0;
        }
        spin_unlock_bh(&rdma->sc_frmr_q_lock);
        if (frmr)
@@ -806,25 +854,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
        return rdma_alloc_frmr(rdma);
 }
 
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
-                          struct svc_rdma_fastreg_mr *frmr)
-{
-       int page_no;
-       for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-               dma_addr_t addr = frmr->page_list->page_list[page_no];
-               if (ib_dma_mapping_error(frmr->mr->device, addr))
-                       continue;
-               atomic_dec(&xprt->sc_dma_used);
-               ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
-                                 frmr->direction);
-       }
-}
-
 void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
                       struct svc_rdma_fastreg_mr *frmr)
 {
        if (frmr) {
-               frmr_unmap_dma(rdma, frmr);
+               ib_dma_unmap_sg(rdma->sc_cm_id->device,
+                               frmr->sg, frmr->sg_nents, frmr->direction);
+               atomic_dec(&rdma->sc_dma_used);
                spin_lock_bh(&rdma->sc_frmr_q_lock);
                WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
                list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
@@ -848,10 +884,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        struct svcxprt_rdma *listen_rdma;
        struct svcxprt_rdma *newxprt = NULL;
        struct rdma_conn_param conn_param;
+       struct ib_cq_init_attr cq_attr = {};
        struct ib_qp_init_attr qp_attr;
        struct ib_device_attr devattr;
        int uninitialized_var(dma_mr_acc);
-       int need_dma_mr;
+       int need_dma_mr = 0;
        int ret;
        int i;
 
@@ -884,6 +921,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
         * capabilities of this particular device */
        newxprt->sc_max_sge = min((size_t)devattr.max_sge,
                                  (size_t)RPCSVC_MAXPAGES);
+       newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+                                      RPCSVC_MAXPAGES);
        newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
                                   (size_t)svcrdma_max_requests);
        newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
@@ -900,22 +939,22 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                dprintk("svcrdma: error creating PD for connect request\n");
                goto errout;
        }
+       cq_attr.cqe = newxprt->sc_sq_depth;
        newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
                                         sq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
-                                        newxprt->sc_sq_depth,
-                                        0);
+                                        &cq_attr);
        if (IS_ERR(newxprt->sc_sq_cq)) {
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
+       cq_attr.cqe = newxprt->sc_max_requests;
        newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
                                         rq_comp_handler,
                                         cq_event_handler,
                                         newxprt,
-                                        newxprt->sc_max_requests,
-                                        0);
+                                        &cq_attr);
        if (IS_ERR(newxprt->sc_rq_cq)) {
                dprintk("svcrdma: error creating RQ CQ for connect request\n");
                goto errout;
@@ -985,35 +1024,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        /*
         * Determine if a DMA MR is required and if so, what privs are required
         */
-       switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
-       case RDMA_TRANSPORT_IWARP:
-               newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
-               if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
-                       need_dma_mr = 1;
-                       dma_mr_acc =
-                               (IB_ACCESS_LOCAL_WRITE |
-                                IB_ACCESS_REMOTE_WRITE);
-               } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
-                       need_dma_mr = 1;
-                       dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               } else
-                       need_dma_mr = 0;
-               break;
-       case RDMA_TRANSPORT_IB:
-               if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
-                       need_dma_mr = 1;
-                       dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               } else if (!(devattr.device_cap_flags &
-                            IB_DEVICE_LOCAL_DMA_LKEY)) {
-                       need_dma_mr = 1;
-                       dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               } else
-                       need_dma_mr = 0;
-               break;
-       default:
+       if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
+                                newxprt->sc_cm_id->port_num) &&
+           !rdma_ib_or_roce(newxprt->sc_cm_id->device,
+                            newxprt->sc_cm_id->port_num))
                goto errout;
+
+       if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
+           !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+               need_dma_mr = 1;
+               dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+               if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
+                                       newxprt->sc_cm_id->port_num) &&
+                   !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
+                       dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
        }
 
+       if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
+                               newxprt->sc_cm_id->port_num))
+               newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
+
        /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
        if (need_dma_mr) {
                /* Register all of physical memory */
@@ -1067,6 +1097,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                "    remote_ip       : %pI4\n"
                "    remote_port     : %d\n"
                "    max_sge         : %d\n"
+               "    max_sge_rd      : %d\n"
                "    sq_depth        : %d\n"
                "    max_requests    : %d\n"
                "    ord             : %d\n",
@@ -1080,6 +1111,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
                       route.addr.dst_addr)->sin_port),
                newxprt->sc_max_sge,
+               newxprt->sc_max_sge_rd,
                newxprt->sc_sq_depth,
                newxprt->sc_max_requests,
                newxprt->sc_ord);
@@ -1222,40 +1254,6 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp)
        return 1;
 }
 
-/*
- * Attempt to register the kvec representing the RPC memory with the
- * device.
- *
- * Returns:
- *  NULL : The device does not support fastreg or there were no more
- *         fastreg mr.
- *  frmr : The kvec register request was successfully posted.
- *    <0 : An error was encountered attempting to register the kvec.
- */
-int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
-                    struct svc_rdma_fastreg_mr *frmr)
-{
-       struct ib_send_wr fastreg_wr;
-       u8 key;
-
-       /* Bump the key */
-       key = (u8)(frmr->mr->lkey & 0x000000FF);
-       ib_update_fast_reg_key(frmr->mr, ++key);
-
-       /* Prepare FASTREG WR */
-       memset(&fastreg_wr, 0, sizeof fastreg_wr);
-       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.send_flags = IB_SEND_SIGNALED;
-       fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
-       fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
-       fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
-       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fastreg_wr.wr.fast_reg.length = frmr->map_len;
-       fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
-       fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
-       return svc_rdma_send(xprt, &fastreg_wr);
-}
-
 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 {
        struct ib_send_wr *bad_wr, *n_wr;
@@ -1319,11 +1317,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
        struct ib_send_wr err_wr;
        struct page *p;
        struct svc_rdma_op_ctxt *ctxt;
-       u32 *va;
+       __be32 *va;
        int length;
        int ret;
 
-       p = svc_rdma_get_page();
+       p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
        va = page_address(p);
 
        /* XDR encode error */