Upgrade to 4.4.50-rt62

[kvmfornfv.git] / kernel / drivers / infiniband / ulp / ipoib / ipoib_cm.c
diff --git a/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c

index cf32a77..3ba7de5 100644 (file)
--- a/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -63,6 +63,8 @@ MODULE_PARM_DESC(cm_data_debug_level,
  #define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
  #define IPOIB_CM_RX_UPDATE_MASK (0x3)
  
+#define IPOIB_CM_RX_RESERVE     (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
+
  static struct ib_qp_attr ipoib_cm_err_attr = {
         .qp_state = IB_QPS_ERR
  };
@@ -147,15 +149,15 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
         struct sk_buff *skb;
         int i;
  
-       skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
+       skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
         if (unlikely(!skb))
                 return NULL;
  
         /*
-        * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
+        * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
          * IP header to a multiple of 16.
          */
-       skb_reserve(skb, 12);
+       skb_reserve(skb, IPOIB_CM_RX_RESERVE);
  
         mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
                                        DMA_FROM_DEVICE);
@@ -332,7 +334,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
         int i;
  
         for (i = 0; i < priv->cm.num_frags; ++i)
-               sge[i].lkey = priv->mr->lkey;
+               sge[i].lkey = priv->pd->local_dma_lkey;
  
         sge[0].length = IPOIB_CM_HEAD_SIZE;
         for (i = 1; i < priv->cm.num_frags; ++i)
@@ -624,9 +626,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
         if (wc->byte_len < IPOIB_CM_COPYBREAK) {
                 int dlen = wc->byte_len;
  
-               small_skb = dev_alloc_skb(dlen + 12);
+               small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
                 if (small_skb) {
-                       skb_reserve(small_skb, 12);
+                       skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
                         ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
                                                    dlen, DMA_FROM_DEVICE);
                         skb_copy_from_linear_data(skb, small_skb->data, dlen);
@@ -663,8 +665,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
  
  copied:
         skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-       skb_reset_mac_header(skb);
-       skb_pull(skb, IPOIB_ENCAP_LEN);
+       skb_add_pseudo_hdr(skb);
  
         ++dev->stats.rx_packets;
         dev->stats.rx_bytes += skb->len;
@@ -694,24 +695,21 @@ repost:
  static inline int post_send(struct ipoib_dev_priv *priv,
                             struct ipoib_cm_tx *tx,
                             unsigned int wr_id,
-                           u64 addr, int len)
+                           struct ipoib_tx_buf *tx_req)
  {
         struct ib_send_wr *bad_wr;
  
-       priv->tx_sge[0].addr          = addr;
-       priv->tx_sge[0].length        = len;
+       ipoib_build_sge(priv, tx_req);
  
-       priv->tx_wr.num_sge     = 1;
-       priv->tx_wr.wr_id       = wr_id | IPOIB_OP_CM;
+       priv->tx_wr.wr.wr_id    = wr_id | IPOIB_OP_CM;
  
-       return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+       return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
  }
  
  void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ipoib_cm_tx_buf *tx_req;
-       u64 addr;
+       struct ipoib_tx_buf *tx_req;
         int rc;
  
         if (unlikely(skb->len > tx->mtu)) {
@@ -735,24 +733,21 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
          */
         tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
         tx_req->skb = skb;
-       addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
-       if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+
+       if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
                 ++dev->stats.tx_errors;
                 dev_kfree_skb_any(skb);
                 return;
         }
  
-       tx_req->mapping = addr;
-
         skb_orphan(skb);
         skb_dst_drop(skb);
  
-       rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
-                      addr, skb->len);
+       rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
         if (unlikely(rc)) {
                 ipoib_warn(priv, "post_send failed, error %d\n", rc);
                 ++dev->stats.tx_errors;
-               ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+               ipoib_dma_unmap_tx(priv, tx_req);
                 dev_kfree_skb_any(skb);
         } else {
                 dev->trans_start = jiffies;
@@ -777,7 +772,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_cm_tx *tx = wc->qp->qp_context;
         unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
-       struct ipoib_cm_tx_buf *tx_req;
+       struct ipoib_tx_buf *tx_req;
         unsigned long flags;
  
         ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
@@ -791,7 +786,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
  
         tx_req = &tx->tx_ring[wr_id];
  
-       ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+       ipoib_dma_unmap_tx(priv, tx_req);
  
         /* FIXME: is this right? Shouldn't we only increment on success? */
         ++dev->stats.tx_packets;
@@ -854,7 +849,7 @@ int ipoib_cm_dev_open(struct net_device *dev)
         }
  
         ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
-                          0, NULL);
+                          0);
         if (ret) {
                 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
                        IPOIB_CM_IETF_ID | priv->qp->qp_num);
@@ -1036,10 +1031,11 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
  
         struct ib_qp *tx_qp;
  
+       if (dev->features & NETIF_F_SG)
+               attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+
         tx_qp = ib_create_qp(priv->pd, &attr);
         if (PTR_ERR(tx_qp) == -EINVAL) {
-               ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
-                          priv->ca->name);
                 attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
                 tx_qp = ib_create_qp(priv->pd, &attr);
         }
@@ -1170,7 +1166,7 @@ err_tx:
  static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
  {
         struct ipoib_dev_priv *priv = netdev_priv(p->dev);
-       struct ipoib_cm_tx_buf *tx_req;
+       struct ipoib_tx_buf *tx_req;
         unsigned long begin;
  
         ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
@@ -1197,8 +1193,7 @@ timeout:
  
         while ((int) p->tx_tail - (int) p->tx_head < 0) {
                 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
-               ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
-                                   DMA_TO_DEVICE);
+               ipoib_dma_unmap_tx(priv, tx_req);
                 dev_kfree_skb_any(tx_req->skb);
                 ++p->tx_tail;
                 netif_tx_lock_bh(p->dev);
@@ -1303,6 +1298,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
         }
  }
  
+#define QPN_AND_OPTIONS_OFFSET 4
+
  static void ipoib_cm_tx_start(struct work_struct *work)
  {
         struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -1311,6 +1308,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
         struct ipoib_neigh *neigh;
         struct ipoib_cm_tx *p;
         unsigned long flags;
+       struct ipoib_path *path;
         int ret;
  
         struct ib_sa_path_rec pathrec;
@@ -1323,7 +1321,19 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                 p = list_entry(priv->cm.start_list.next, typeof(*p), list);
                 list_del_init(&p->list);
                 neigh = p->neigh;
+
                 qpn = IPOIB_QPN(neigh->daddr);
+               /*
+                * As long as the search is with these 2 locks,
+                * path existence indicates its validity.
+                */
+               path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+               if (!path) {
+                       pr_info("%s ignore not valid path %pI6\n",
+                               __func__,
+                               neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+                       goto free_neigh;
+               }
                 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
  
                 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1335,6 +1345,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                 spin_lock_irqsave(&priv->lock, flags);
  
                 if (ret) {
+free_neigh:
                         neigh = p->neigh;
                         if (neigh) {
                                 neigh->cm = NULL;
@@ -1455,7 +1466,6 @@ static void ipoib_cm_stale_task(struct work_struct *work)
         spin_unlock_irq(&priv->lock);
  }
  
-
  static ssize_t show_mode(struct device *d, struct device_attribute *attr,
                          char *buf)
  {