These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / misc / mic / scif / scif_nodeqp.c
diff --git a/kernel/drivers/misc/mic/scif/scif_nodeqp.c b/kernel/drivers/misc/mic/scif/scif_nodeqp.c
new file mode 100644 (file)
index 0000000..c66ca1a
--- /dev/null
@@ -0,0 +1,1354 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_nodeqp.h"
+#include "scif_map.h"
+
+/*
+ ************************************************************************
+ * SCIF node Queue Pair (QP) setup flow:
+ *
+ * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
+ * 2) scif_setup_qp(..) allocates the local qp and calls
+ *     scif_setup_qp_connect(..) which allocates and maps the local
+ *     buffer for the inbound QP
+ * 3) The local node updates the device page with the DMA address of the QP
+ * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
+ *     the peer node has updated its QP DMA address
+ * 5) Once a valid non zero address is found in the QP DMA address field
+ *     in the device page, the local node maps the remote node's QP,
+ *     updates its outbound QP and sends a SCIF_INIT message to the peer
+ * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
+ *     half handler by calling scif_init(..)
+ * 7) scif_init(..) registers a new SCIF peer node by calling
+ *     scif_peer_register_device(..) which signifies the addition of a new
+ *     SCIF node
+ * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
+ *     remote nodes are online via scif_p2p_setup(..)
+ * 9) For P2P setup, the host maps the remote nodes' aperture and memory
+ *     bars and sends a SCIF_NODE_ADD message to both nodes
+ * 10) As part of scif_nodeadd, both nodes set up their local inbound
+ *     QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
+ * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
+ *     SCIF_NODE_ADD_ACK to the remote nodes
+ * 12) As part of scif_node_add_ack(..) the remote nodes update their
+ *     outbound QPs, make sure they can access memory on the remote node
+ *     and then add a new SCIF peer node by calling
+ *     scif_peer_register_device(..) which signifies the addition of a new
+ *     SCIF node.
+ * 13) The SCIF network is now established across all nodes.
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by non mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) A non mgmt node now cleans up all local data structures and sends a
+ *     SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
+ * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
+ * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
+ *     peers and waits for a SCIF_NODE_REMOVE_ACK
+ * 6) As part of scif_node_remove(..) a remote node unregisters the peer
+ *     node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *     it sends itself a node remove message whose handling cleans up local
+ *     data structures and unregisters the peer node from the SCIF network
+ * 8) The mgmt node sends a SCIF_EXIT_ACK
+ * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
+ *     completes the SCIF remove routine
+ * 10) The SCIF network is now torn down for the node initiating the
+ *     teardown sequence
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) The mgmt node calls scif_disconnect_node(..)
+ * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
+ *     and waits for a SCIF_NODE_REMOVE_ACK
+ * 5) As part of scif_node_remove(..) a remote node unregisters the peer
+ *     node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *     it unregisters the peer node from the SCIF network
+ * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
+ * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
+ *     which would clean up local data structures for all SCIF nodes and
+ *     then send a SCIF_EXIT_ACK back to the mgmt node
+ * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
+ *     remove message whose handling cleans up local data structures and
+ *     destroys any P2P mappings.
+ * 10) The SCIF hardware device for which a remove callback was received is now
+ *     disconnected from the SCIF network.
+ */
+/*
+ * Initializes "local" data structures for the QP. Allocates the QP
+ * ring buffer (rb) and initializes the "in bound" queue.
+ */
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+                         int local_size, struct scif_dev *scifdev)
+{
+       void *local_q = qp->inbound_q.rb_base;
+       int err = 0;
+       u32 tmp_rd = 0;
+
+       spin_lock_init(&qp->send_lock);
+       spin_lock_init(&qp->recv_lock);
+
+       /* Allocate rb only if not already allocated */
+       if (!local_q) {
+               local_q = kzalloc(local_size, GFP_KERNEL);
+               if (!local_q) {
+                       err = -ENOMEM;
+                       return err;
+               }
+       }
+
+       err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+       if (err)
+               goto kfree;
+       /*
+        * To setup the inbound_q, the buffer lives locally, the read pointer
+        * is remote and the write pointer is local.
+        */
+       scif_rb_init(&qp->inbound_q,
+                    &tmp_rd,
+                    &qp->local_write,
+                    local_q, get_count_order(local_size));
+       /*
+        * The read pointer is NULL initially and it is unsafe to use the ring
+        * buffer til this changes!
+        */
+       qp->inbound_q.read_ptr = NULL;
+       err = scif_map_single(qp_offset, qp,
+                             scifdev, sizeof(struct scif_qp));
+       if (err)
+               goto unmap;
+       qp->local_qp = *qp_offset;
+       return err;
+unmap:
+       scif_unmap_single(qp->local_buf, scifdev, local_size);
+       qp->local_buf = 0;
+kfree:
+       kfree(local_q);
+       return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+                        dma_addr_t phys, int local_size,
+                        struct scif_dev *scifdev)
+{
+       void *local_q;
+       void *remote_q;
+       struct scif_qp *remote_qp;
+       int remote_size;
+       int err = 0;
+
+       spin_lock_init(&qp->send_lock);
+       spin_lock_init(&qp->recv_lock);
+       /* Start by figuring out where we need to point */
+       remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
+       if (!remote_qp)
+               return -EIO;
+       qp->remote_qp = remote_qp;
+       if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+               err = -EIO;
+               goto iounmap;
+       }
+       qp->remote_buf = remote_qp->local_buf;
+       remote_size = qp->remote_qp->inbound_q.size;
+       remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+       if (!remote_q) {
+               err = -EIO;
+               goto iounmap;
+       }
+       qp->remote_qp->local_write = 0;
+       /*
+        * To setup the outbound_q, the buffer lives in remote memory,
+        * the read pointer is local, the write pointer is remote
+        */
+       scif_rb_init(&qp->outbound_q,
+                    &qp->local_read,
+                    &qp->remote_qp->local_write,
+                    remote_q,
+                    get_count_order(remote_size));
+       local_q = kzalloc(local_size, GFP_KERNEL);
+       if (!local_q) {
+               err = -ENOMEM;
+               goto iounmap_1;
+       }
+       err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+       if (err)
+               goto kfree;
+       qp->remote_qp->local_read = 0;
+       /*
+        * To setup the inbound_q, the buffer lives locally, the read pointer
+        * is remote and the write pointer is local
+        */
+       scif_rb_init(&qp->inbound_q,
+                    &qp->remote_qp->local_read,
+                    &qp->local_write,
+                    local_q, get_count_order(local_size));
+       err = scif_map_single(qp_offset, qp, scifdev,
+                             sizeof(struct scif_qp));
+       if (err)
+               goto unmap;
+       qp->local_qp = *qp_offset;
+       return err;
+unmap:
+       scif_unmap_single(qp->local_buf, scifdev, local_size);
+       qp->local_buf = 0;
+kfree:
+       kfree(local_q);
+iounmap_1:
+       scif_iounmap(remote_q, remote_size, scifdev);
+       qp->outbound_q.rb_base = NULL;
+iounmap:
+       scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
+       qp->remote_qp = NULL;
+       return err;
+}
+
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+                                  struct scif_qp *qp, u64 payload)
+{
+       int err = 0;
+       void *r_buf;
+       int remote_size;
+       phys_addr_t tmp_phys;
+
+       qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
+
+       if (!qp->remote_qp) {
+               err = -ENOMEM;
+               goto error;
+       }
+
+       if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+               dev_err(&scifdev->sdev->dev,
+                       "SCIFEP_MAGIC mismatch between self %d remote %d\n",
+                       scif_dev[scif_info.nodeid].node, scifdev->node);
+               err = -ENODEV;
+               goto error;
+       }
+
+       tmp_phys = qp->remote_qp->local_buf;
+       remote_size = qp->remote_qp->inbound_q.size;
+       r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+       if (!r_buf)
+               return -EIO;
+
+       qp->local_read = 0;
+       scif_rb_init(&qp->outbound_q,
+                    &qp->local_read,
+                    &qp->remote_qp->local_write,
+                    r_buf,
+                    get_count_order(remote_size));
+       /*
+        * Because the node QP may already be processing an INIT message, set
+        * the read pointer so the cached read offset isn't lost
+        */
+       qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+       /*
+        * resetup the inbound_q now that we know where the
+        * inbound_read really is.
+        */
+       scif_rb_init(&qp->inbound_q,
+                    &qp->remote_qp->local_read,
+                    &qp->local_write,
+                    qp->inbound_q.rb_base,
+                    get_count_order(qp->inbound_q.size));
+error:
+       return err;
+}
+
+static __always_inline void
+scif_send_msg_intr(struct scif_dev *scifdev)
+{
+       struct scif_hw_dev *sdev = scifdev->sdev;
+
+       if (scifdev_is_p2p(scifdev))
+               sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
+       else
+               sdev->hw_ops->send_intr(sdev, scifdev->rdb);
+}
+
+int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
+{
+       int err = 0;
+       struct scifmsg msg;
+
+       err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
+       if (!err) {
+               /*
+                * Now that everything is setup and mapped, we're ready
+                * to tell the peer about our queue's location
+                */
+               msg.uop = SCIF_INIT;
+               msg.dst.node = scifdev->node;
+               err = scif_nodeqp_send(scifdev, &msg);
+       }
+       return err;
+}
+
+void scif_send_exit(struct scif_dev *scifdev)
+{
+       struct scifmsg msg;
+       int ret;
+
+       scifdev->exit = OP_IN_PROGRESS;
+       msg.uop = SCIF_EXIT;
+       msg.src.node = scif_info.nodeid;
+       msg.dst.node = scifdev->node;
+       ret = scif_nodeqp_send(scifdev, &msg);
+       if (ret)
+               goto done;
+       /* Wait for a SCIF_EXIT_ACK message */
+       wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
+                          SCIF_NODE_ALIVE_TIMEOUT);
+done:
+       scifdev->exit = OP_IDLE;
+}
+
+int scif_setup_qp(struct scif_dev *scifdev)
+{
+       int err = 0;
+       int local_size;
+       struct scif_qp *qp;
+
+       local_size = SCIF_NODE_QP_SIZE;
+
+       qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+       if (!qp) {
+               err = -ENOMEM;
+               return err;
+       }
+       qp->magic = SCIFEP_MAGIC;
+       scifdev->qpairs = qp;
+       err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
+                                   local_size, scifdev);
+       if (err)
+               goto free_qp;
+       /*
+        * We're as setup as we can be. The inbound_q is setup, w/o a usable
+        * outbound q.  When we get a message, the read_ptr will be updated,
+        * and we will pull the message.
+        */
+       return err;
+free_qp:
+       kfree(scifdev->qpairs);
+       scifdev->qpairs = NULL;
+       return err;
+}
+
+static void scif_p2p_freesg(struct scatterlist *sg)
+{
+       kfree(sg);
+}
+
+static struct scatterlist *
+scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
+{
+       struct scatterlist *sg;
+       struct page *page;
+       int i;
+
+       sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
+       if (!sg)
+               return NULL;
+       sg_init_table(sg, page_cnt);
+       for (i = 0; i < page_cnt; i++) {
+               page = pfn_to_page(pa >> PAGE_SHIFT);
+               sg_set_page(&sg[i], page, page_size, 0);
+               pa += page_size;
+       }
+       return sg;
+}
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static struct scif_p2p_info *
+scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
+{
+       struct scif_p2p_info *p2p;
+       int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
+       struct scif_hw_dev *psdev = peerdev->sdev;
+       struct scif_hw_dev *sdev = scifdev->sdev;
+
+       num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
+       num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
+
+       p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
+       if (!p2p)
+               return NULL;
+       p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
+                                                   PAGE_SIZE, num_mmio_pages);
+       if (!p2p->ppi_sg[SCIF_PPI_MMIO])
+               goto free_p2p;
+       p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
+       sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
+       num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+       p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
+                                                   1 << sg_page_shift,
+                                                   num_aper_chunks);
+       p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
+       err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+                        num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+       if (err != num_mmio_pages)
+               goto scif_p2p_free;
+       err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+                        num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
+       if (err != num_aper_chunks)
+               goto dma_unmap;
+       p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
+       p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
+       p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
+       p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
+       p2p->ppi_peer_id = peerdev->node;
+       return p2p;
+dma_unmap:
+       dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+                    p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+scif_p2p_free:
+       scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+       scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+free_p2p:
+       kfree(p2p);
+       return NULL;
+}
+
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+                                struct scif_p2p_info *p2p)
+{
+       struct scif_hw_dev *sdev = scifdev->sdev;
+
+       dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+                    p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+       dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+                    p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+       scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+       scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+       kfree(p2p);
+}
+
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @dst: Destination node
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Management node here acts like a proxy.
+ */
+static void scif_node_connect(struct scif_dev *scifdev, int dst)
+{
+       struct scif_dev *dev_j = scifdev;
+       struct scif_dev *dev_i = NULL;
+       struct scif_p2p_info *p2p_ij = NULL;    /* bus addr for j from i */
+       struct scif_p2p_info *p2p_ji = NULL;    /* bus addr for i from j */
+       struct scif_p2p_info *p2p;
+       struct list_head *pos, *tmp;
+       struct scifmsg msg;
+       int err;
+       u64 tmppayload;
+
+       if (dst < 1 || dst > scif_info.maxid)
+               return;
+
+       dev_i = &scif_dev[dst];
+
+       if (!_scifdev_alive(dev_i))
+               return;
+       /*
+        * If the p2p connection is already setup or in the process of setting
+        * up then just ignore this request. The requested node will get
+        * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+        */
+       if (!list_empty(&dev_i->p2p)) {
+               list_for_each_safe(pos, tmp, &dev_i->p2p) {
+                       p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+                       if (p2p->ppi_peer_id == dev_j->node)
+                               return;
+               }
+       }
+       p2p_ij = scif_init_p2p_info(dev_i, dev_j);
+       if (!p2p_ij)
+               return;
+       p2p_ji = scif_init_p2p_info(dev_j, dev_i);
+       if (!p2p_ji) {
+               scif_deinit_p2p_info(dev_i, p2p_ij);
+               return;
+       }
+       list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
+       list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
+
+       /*
+        * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+        * as seen from dev_j
+        */
+       msg.uop = SCIF_NODE_ADD;
+       msg.src.node = dev_j->node;
+       msg.dst.node = dev_i->node;
+
+       msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
+       msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
+       msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
+       msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+       err = scif_nodeqp_send(dev_i,  &msg);
+       if (err) {
+               dev_err(&scifdev->sdev->dev,
+                       "%s %d error %d\n", __func__, __LINE__, err);
+               return;
+       }
+
+       /* Same as above but to dev_j */
+       msg.uop = SCIF_NODE_ADD;
+       msg.src.node = dev_i->node;
+       msg.dst.node = dev_j->node;
+
+       tmppayload = msg.payload[0];
+       msg.payload[0] = msg.payload[2];
+       msg.payload[2] = tmppayload;
+       msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
+       msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+       scif_nodeqp_send(dev_j, &msg);
+}
+
+static void scif_p2p_setup(void)
+{
+       int i, j;
+
+       if (!scif_info.p2p_enable)
+               return;
+
+       for (i = 1; i <= scif_info.maxid; i++)
+               if (!_scifdev_alive(&scif_dev[i]))
+                       return;
+
+       for (i = 1; i <= scif_info.maxid; i++) {
+               for (j = 1; j <= scif_info.maxid; j++) {
+                       struct scif_dev *scifdev = &scif_dev[i];
+
+                       if (i == j)
+                               continue;
+                       scif_node_connect(scifdev, j);
+               }
+       }
+}
+
+static char *message_types[] = {"BAD",
+                               "INIT",
+                               "EXIT",
+                               "SCIF_EXIT_ACK",
+                               "SCIF_NODE_ADD",
+                               "SCIF_NODE_ADD_ACK",
+                               "SCIF_NODE_ADD_NACK",
+                               "REMOVE_NODE",
+                               "REMOVE_NODE_ACK",
+                               "CNCT_REQ",
+                               "CNCT_GNT",
+                               "CNCT_GNTACK",
+                               "CNCT_GNTNACK",
+                               "CNCT_REJ",
+                               "DISCNCT",
+                               "DISCNT_ACK",
+                               "CLIENT_SENT",
+                               "CLIENT_RCVD",
+                               "SCIF_GET_NODE_INFO",
+                               "REGISTER",
+                               "REGISTER_ACK",
+                               "REGISTER_NACK",
+                               "UNREGISTER",
+                               "UNREGISTER_ACK",
+                               "UNREGISTER_NACK",
+                               "ALLOC_REQ",
+                               "ALLOC_GNT",
+                               "ALLOC_REJ",
+                               "FREE_PHYS",
+                               "FREE_VIRT",
+                               "MUNMAP",
+                               "MARK",
+                               "MARK_ACK",
+                               "MARK_NACK",
+                               "WAIT",
+                               "WAIT_ACK",
+                               "WAIT_NACK",
+                               "SIGNAL_LOCAL",
+                               "SIGNAL_REMOTE",
+                               "SIG_ACK",
+                               "SIG_NACK"};
+
+static void
+scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
+                    const char *label)
+{
+       if (!scif_info.en_msg_log)
+               return;
+       if (msg->uop > SCIF_MAX_MSG) {
+               dev_err(&scifdev->sdev->dev,
+                       "%s: unknown msg type %d\n", label, msg->uop);
+               return;
+       }
+       dev_info(&scifdev->sdev->dev,
+                "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
+                label, message_types[msg->uop], msg->src.node, msg->src.port,
+                msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
+                msg->payload[2], msg->payload[3]);
+}
+
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       struct scif_qp *qp = scifdev->qpairs;
+       int err = -ENOMEM, loop_cnt = 0;
+
+       scif_display_message(scifdev, msg, "Sent");
+       if (!qp) {
+               err = -EINVAL;
+               goto error;
+       }
+       spin_lock(&qp->send_lock);
+
+       while ((err = scif_rb_write(&qp->outbound_q,
+                                   msg, sizeof(struct scifmsg)))) {
+               mdelay(1);
+#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
+               if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
+                       err = -ENODEV;
+                       break;
+               }
+       }
+       if (!err)
+               scif_rb_commit(&qp->outbound_q);
+       spin_unlock(&qp->send_lock);
+       if (!err) {
+               if (scifdev_self(scifdev))
+                       /*
+                        * For loopback we need to emulate an interrupt by
+                        * queuing work for the queue handling real node
+                        * Qp interrupts.
+                        */
+                       queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+               else
+                       scif_send_msg_intr(scifdev);
+       }
+error:
+       if (err)
+               dev_dbg(&scifdev->sdev->dev,
+                       "%s %d error %d uop %d\n",
+                        __func__, __LINE__, err, msg->uop);
+       return err;
+}
+
+/**
+ * scif_nodeqp_send - Send a message on the node queue pair
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ */
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       int err;
+       struct device *spdev = NULL;
+
+       if (msg->uop > SCIF_EXIT_ACK) {
+               /* Dont send messages once the exit flow has begun */
+               if (OP_IDLE != scifdev->exit)
+                       return -ENODEV;
+               spdev = scif_get_peer_dev(scifdev);
+               if (IS_ERR(spdev)) {
+                       err = PTR_ERR(spdev);
+                       return err;
+               }
+       }
+       err = _scif_nodeqp_send(scifdev, msg);
+       if (msg->uop > SCIF_EXIT_ACK)
+               scif_put_peer_dev(spdev);
+       return err;
+}
+
+/*
+ * scif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
+ */
+void scif_misc_handler(struct work_struct *work)
+{
+       scif_rma_handle_remote_fences();
+       scif_rma_destroy_windows();
+       scif_rma_destroy_tcw_invalid();
+       scif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init() - Respond to SCIF_INIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ */
+static __always_inline void
+scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       /*
+        * Allow the thread waiting for device page updates for the peer QP DMA
+        * address to complete initializing the inbound_q.
+        */
+       flush_delayed_work(&scifdev->qp_dwork);
+
+       scif_peer_register_device(scifdev);
+
+       if (scif_is_mgmt_node()) {
+               mutex_lock(&scif_info.conflock);
+               scif_p2p_setup();
+               mutex_unlock(&scif_info.conflock);
+       }
+}
+
+/**
+ * scif_exit() - Respond to SCIF_EXIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * This function stops the SCIF interface for the node which sent
+ * the SCIF_EXIT message and starts waiting for that node to
+ * resetup the queue pair again.
+ */
+static __always_inline void
+scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+       scifdev->exit_ack_pending = true;
+       if (scif_is_mgmt_node())
+               scif_disconnect_node(scifdev->node, false);
+       else
+               scif_stop(scifdev);
+       schedule_delayed_work(&scifdev->qp_dwork,
+                             msecs_to_jiffies(1000));
+}
+
+/**
+ * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ */
+static __always_inline void
+scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+       scifdev->exit = OP_COMPLETED;
+       wake_up(&scif_info.exitwq);
+}
+
+/**
+ * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * When the mgmt node driver has finished initializing a MIC node queue pair it
+ * marks the node as online. It then looks for all currently online MIC cards
+ * and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
+ * this message to the new node
+ */
+static __always_inline void
+scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       struct scif_dev *newdev;
+       dma_addr_t qp_offset;
+       int qp_connect;
+       struct scif_hw_dev *sdev;
+
+       dev_dbg(&scifdev->sdev->dev,
+               "Scifdev %d:%d received NODE_ADD msg for node %d\n",
+               scifdev->node, msg->dst.node, msg->src.node);
+       dev_dbg(&scifdev->sdev->dev,
+               "Remote address for this node's aperture %llx\n",
+               msg->payload[0]);
+       newdev = &scif_dev[msg->src.node];
+       newdev->node = msg->src.node;
+       newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+       sdev = newdev->sdev;
+
+       if (scif_setup_intr_wq(newdev)) {
+               dev_err(&scifdev->sdev->dev,
+                       "failed to setup interrupts for %d\n", msg->src.node);
+               goto interrupt_setup_error;
+       }
+       newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len);
+       if (!newdev->mmio.va) {
+               dev_err(&scifdev->sdev->dev,
+                       "failed to map mmio for %d\n", msg->src.node);
+               goto mmio_map_error;
+       }
+       newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
+       if (!newdev->qpairs)
+               goto qp_alloc_error;
+       /*
+        * Set the base address of the remote node's memory since it gets
+        * added to qp_offset
+        */
+       newdev->base_addr = msg->payload[0];
+
+       qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
+                                          SCIF_NODE_QP_SIZE, newdev);
+       if (qp_connect) {
+               dev_err(&scifdev->sdev->dev,
+                       "failed to setup qp_connect %d\n", qp_connect);
+               goto qp_connect_error;
+       }
+
+       newdev->db = sdev->hw_ops->next_db(sdev);
+       newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+                                                  "SCIF_INTR", newdev,
+                                                  newdev->db);
+       if (IS_ERR(newdev->cookie))
+               goto qp_connect_error;
+       newdev->qpairs->magic = SCIFEP_MAGIC;
+       newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
+
+       msg->uop = SCIF_NODE_ADD_ACK;
+       msg->dst.node = msg->src.node;
+       msg->src.node = scif_info.nodeid;
+       msg->payload[0] = qp_offset;
+       msg->payload[2] = newdev->db;
+       scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+       return;
+qp_connect_error:
+       kfree(newdev->qpairs);
+       newdev->qpairs = NULL;
+qp_alloc_error:
+       iounmap(newdev->mmio.va);
+       newdev->mmio.va = NULL;
+mmio_map_error:
+interrupt_setup_error:
+       dev_err(&scifdev->sdev->dev,
+               "node add failed for node %d\n", msg->src.node);
+       msg->uop = SCIF_NODE_ADD_NACK;
+       msg->dst.node = msg->src.node;
+       msg->src.node = scif_info.nodeid;
+       scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define SCIF_NODE_QP_RETRY 100
+#define SCIF_NODE_QP_TIMEOUT 100
+       struct scif_dev *peerdev = container_of(work, struct scif_dev,
+                                                       p2p_dwork.work);
+       struct scif_qp *qp = &peerdev->qpairs[0];
+
+       if (qp->qp_state != SCIF_QP_ONLINE ||
+           qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
+               if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
+                       dev_err(&peerdev->sdev->dev,
+                               "Warning: QP check timeout with state %d\n",
+                               qp->qp_state);
+                       goto timeout;
+               }
+               schedule_delayed_work(&peerdev->p2p_dwork,
+                                     msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
+               return;
+       }
+       return;
+timeout:
+       dev_err(&peerdev->sdev->dev,
+               "%s %d remote node %d offline,  state = 0x%x\n",
+               __func__, __LINE__, peerdev->node, qp->qp_state);
+       qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+       scif_peer_unregister_device(peerdev);
+       scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
+ * message to the mgmt node to confirm the sequence is finished.
+ *
+ */
+static __always_inline void
+scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       struct scif_dev *peerdev;
+       struct scif_qp *qp;
+       struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+       dev_dbg(&scifdev->sdev->dev,
+               "Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
+               scifdev->node, msg->src.node, msg->dst.node);
+       dev_dbg(&scifdev->sdev->dev,
+               "payload %llx %llx %llx %llx\n", msg->payload[0],
+               msg->payload[1], msg->payload[2], msg->payload[3]);
+       if (scif_is_mgmt_node()) {
+               /*
+                * the lock serializes with scif_qp_response_ack. The mgmt node
+                * is forwarding the NODE_ADD_ACK message from src to dst we
+                * need to make sure that the dst has already received a
+                * NODE_ADD for src and setup its end of the qp to dst
+                */
+               mutex_lock(&scif_info.conflock);
+               msg->payload[1] = scif_info.maxid;
+               scif_nodeqp_send(dst_dev, msg);
+               mutex_unlock(&scif_info.conflock);
+               return;
+       }
+       peerdev = &scif_dev[msg->src.node];
+       peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+       peerdev->node = msg->src.node;
+
+       qp = &peerdev->qpairs[0];
+
+       if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+                                           msg->payload[0])))
+               goto local_error;
+       peerdev->rdb = msg->payload[2];
+       qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+       scif_peer_register_device(peerdev);
+
+       schedule_delayed_work(&peerdev->p2p_dwork, 0);
+       return;
+local_error:
+       scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       if (scif_is_mgmt_node()) {
+               struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+               dev_dbg(&scifdev->sdev->dev,
+                       "SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
+               scif_nodeqp_send(dst_dev, msg);
+       }
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       int node = msg->payload[0];
+       struct scif_dev *scdev = &scif_dev[node];
+
+       scdev->node_remove_ack_pending = true;
+       scif_handle_remove_node(node);
+}
+
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       struct scif_dev *sdev = &scif_dev[msg->payload[0]];
+
+       atomic_inc(&sdev->disconn_rescnt);
+       wake_up(&sdev->disconn_wq);
+}
+
+/**
+ * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg:        Interrupt message
+ *
+ * Retrieve node info i.e maxid and total from the mgmt node.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       if (scif_is_mgmt_node()) {
+               swap(msg->dst.node, msg->src.node);
+               mutex_lock(&scif_info.conflock);
+               msg->payload[1] = scif_info.maxid;
+               msg->payload[2] = scif_info.total;
+               mutex_unlock(&scif_info.conflock);
+               scif_nodeqp_send(scifdev, msg);
+       } else {
+               struct completion *node_info =
+                       (struct completion *)msg->payload[3];
+
+               mutex_lock(&scif_info.conflock);
+               scif_info.maxid = msg->payload[1];
+               scif_info.total = msg->payload[2];
+               complete_all(node_info);
+               mutex_unlock(&scif_info.conflock);
+       }
+}
+
+static void
+scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+       /* Bogus Node Qp Message? */
+       dev_err(&scifdev->sdev->dev,
+               "Unknown message 0x%xn scifdev->node 0x%x\n",
+               msg->uop, scifdev->node);
+}
+
+static void (*scif_intr_func[SCIF_MAX_MSG + 1])
+           (struct scif_dev *, struct scifmsg *msg) = {
+       scif_msg_unknown,       /* Error */
+       scif_init,              /* SCIF_INIT */
+       scif_exit,              /* SCIF_EXIT */
+       scif_exit_ack,          /* SCIF_EXIT_ACK */
+       scif_node_add,          /* SCIF_NODE_ADD */
+       scif_node_add_ack,      /* SCIF_NODE_ADD_ACK */
+       scif_node_add_nack,     /* SCIF_NODE_ADD_NACK */
+       scif_node_remove,       /* SCIF_NODE_REMOVE */
+       scif_node_remove_ack,   /* SCIF_NODE_REMOVE_ACK */
+       scif_cnctreq,           /* SCIF_CNCT_REQ */
+       scif_cnctgnt,           /* SCIF_CNCT_GNT */
+       scif_cnctgnt_ack,       /* SCIF_CNCT_GNTACK */
+       scif_cnctgnt_nack,      /* SCIF_CNCT_GNTNACK */
+       scif_cnctrej,           /* SCIF_CNCT_REJ */
+       scif_discnct,           /* SCIF_DISCNCT */
+       scif_discnt_ack,        /* SCIF_DISCNT_ACK */
+       scif_clientsend,        /* SCIF_CLIENT_SENT */
+       scif_clientrcvd,        /* SCIF_CLIENT_RCVD */
+       scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+       scif_recv_reg,          /* SCIF_REGISTER */
+       scif_recv_reg_ack,      /* SCIF_REGISTER_ACK */
+       scif_recv_reg_nack,     /* SCIF_REGISTER_NACK */
+       scif_recv_unreg,        /* SCIF_UNREGISTER */
+       scif_recv_unreg_ack,    /* SCIF_UNREGISTER_ACK */
+       scif_recv_unreg_nack,   /* SCIF_UNREGISTER_NACK */
+       scif_alloc_req,         /* SCIF_ALLOC_REQ */
+       scif_alloc_gnt_rej,     /* SCIF_ALLOC_GNT */
+       scif_alloc_gnt_rej,     /* SCIF_ALLOC_REJ */
+       scif_free_virt,         /* SCIF_FREE_VIRT */
+       scif_recv_munmap,       /* SCIF_MUNMAP */
+       scif_recv_mark,         /* SCIF_MARK */
+       scif_recv_mark_resp,    /* SCIF_MARK_ACK */
+       scif_recv_mark_resp,    /* SCIF_MARK_NACK */
+       scif_recv_wait,         /* SCIF_WAIT */
+       scif_recv_wait_resp,    /* SCIF_WAIT_ACK */
+       scif_recv_wait_resp,    /* SCIF_WAIT_NACK */
+       scif_recv_sig_local,    /* SCIF_SIG_LOCAL */
+       scif_recv_sig_remote,   /* SCIF_SIG_REMOTE */
+       scif_recv_sig_resp,     /* SCIF_SIG_ACK */
+       scif_recv_sig_resp,     /* SCIF_SIG_NACK */
+};
+
+/**
+ * scif_nodeqp_msg_handler() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appropriate routine to handle a Node Qp
+ * message receipt
+ */
+static int scif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+scif_nodeqp_msg_handler(struct scif_dev *scifdev,
+                       struct scif_qp *qp, struct scifmsg *msg)
+{
+       scif_display_message(scifdev, msg, "Rcvd");
+
+       if (msg->uop > (u32)scif_max_msg_id) {
+               /* Bogus Node Qp Message? */
+               dev_err(&scifdev->sdev->dev,
+                       "Unknown message 0x%xn scifdev->node 0x%x\n",
+                       msg->uop, scifdev->node);
+               return;
+       }
+
+       scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhandler() - Interrupt handler for node messages
+ * @scifdev:    Remote device to respond to
+ * @qp:         Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism.  It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+       struct scifmsg msg;
+       int read_size;
+
+       do {
+               read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
+               if (!read_size)
+                       break;
+               scif_nodeqp_msg_handler(scifdev, qp, &msg);
+               /*
+                * The node queue pair is unmapped so skip the read pointer
+                * update after receipt of a SCIF_EXIT_ACK
+                */
+               if (SCIF_EXIT_ACK == msg.uop)
+                       break;
+               scif_rb_update_read_ptr(&qp->inbound_q);
+       } while (1);
+}
+
+/**
+ * scif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void scif_loopb_wq_handler(struct work_struct *unused)
+{
+       struct scif_dev *scifdev = scif_info.loopb_dev;
+       struct scif_qp *qp = scifdev->qpairs;
+       struct scif_loopb_msg *msg;
+
+       do {
+               msg = NULL;
+               spin_lock(&qp->recv_lock);
+               if (!list_empty(&scif_info.loopb_recv_q)) {
+                       msg = list_first_entry(&scif_info.loopb_recv_q,
+                                              struct scif_loopb_msg,
+                                              list);
+                       list_del(&msg->list);
+               }
+               spin_unlock(&qp->recv_lock);
+
+               if (msg) {
+                       scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+                       kfree(msg);
+               }
+       } while (msg);
+}
+
+/**
+ * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF device and blocks since
+ * there is no space in the RB while it has the send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+       int read_size;
+       struct scif_loopb_msg *msg;
+
+       do {
+               msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+               if (!msg)
+                       return -ENOMEM;
+               read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
+                                            sizeof(struct scifmsg));
+               if (read_size != sizeof(struct scifmsg)) {
+                       kfree(msg);
+                       scif_rb_update_read_ptr(&qp->inbound_q);
+                       break;
+               }
+               spin_lock(&qp->recv_lock);
+               list_add_tail(&msg->list, &scif_info.loopb_recv_q);
+               spin_unlock(&qp->recv_lock);
+               queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
+               scif_rb_update_read_ptr(&qp->inbound_q);
+       } while (read_size == sizeof(struct scifmsg));
+       return read_size;
+}
+
+/**
+ * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs and ring buffers
+ */
+int scif_setup_loopback_qp(struct scif_dev *scifdev)
+{
+       int err = 0;
+       void *local_q;
+       struct scif_qp *qp;
+
+       err = scif_setup_intr_wq(scifdev);
+       if (err)
+               goto exit;
+       INIT_LIST_HEAD(&scif_info.loopb_recv_q);
+       snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
+                "SCIF LOOPB %d", scifdev->node);
+       scif_info.loopb_wq =
+               alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
+       if (!scif_info.loopb_wq) {
+               err = -ENOMEM;
+               goto destroy_intr;
+       }
+       INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
+       /* Allocate Self Qpair */
+       scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
+       if (!scifdev->qpairs) {
+               err = -ENOMEM;
+               goto destroy_loopb_wq;
+       }
+
+       qp = scifdev->qpairs;
+       qp->magic = SCIFEP_MAGIC;
+       spin_lock_init(&qp->send_lock);
+       spin_lock_init(&qp->recv_lock);
+
+       local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
+       if (!local_q) {
+               err = -ENOMEM;
+               goto free_qpairs;
+       }
+       /*
+        * For loopback the inbound_q and outbound_q are essentially the same
+        * since the Node sends a message on the loopback interface to the
+        * outbound_q which is then received on the inbound_q.
+        */
+       scif_rb_init(&qp->outbound_q,
+                    &qp->local_read,
+                    &qp->local_write,
+                    local_q, get_count_order(SCIF_NODE_QP_SIZE));
+
+       scif_rb_init(&qp->inbound_q,
+                    &qp->local_read,
+                    &qp->local_write,
+                    local_q, get_count_order(SCIF_NODE_QP_SIZE));
+       scif_info.nodeid = scifdev->node;
+
+       scif_peer_register_device(scifdev);
+
+       scif_info.loopb_dev = scifdev;
+       return err;
+free_qpairs:
+       kfree(scifdev->qpairs);
+destroy_loopb_wq:
+       destroy_workqueue(scif_info.loopb_wq);
+destroy_intr:
+       scif_destroy_intr_wq(scifdev);
+exit:
+       return err;
+}
+
+/**
+ * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int scif_destroy_loopback_qp(struct scif_dev *scifdev)
+{
+       scif_peer_unregister_device(scifdev);
+       destroy_workqueue(scif_info.loopb_wq);
+       scif_destroy_intr_wq(scifdev);
+       kfree(scifdev->qpairs->outbound_q.rb_base);
+       kfree(scifdev->qpairs);
+       scifdev->sdev = NULL;
+       scif_info.loopb_dev = NULL;
+       return 0;
+}
+
+void scif_destroy_p2p(struct scif_dev *scifdev)
+{
+       struct scif_dev *peer_dev;
+       struct scif_p2p_info *p2p;
+       struct list_head *pos, *tmp;
+       int bd;
+
+       mutex_lock(&scif_info.conflock);
+       /* Free P2P mappings in the given node for all its peer nodes */
+       list_for_each_safe(pos, tmp, &scifdev->p2p) {
+               p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+               dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+                            p2p->sg_nentries[SCIF_PPI_MMIO],
+                            DMA_BIDIRECTIONAL);
+               dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+                            p2p->sg_nentries[SCIF_PPI_APER],
+                            DMA_BIDIRECTIONAL);
+               scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+               scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+               list_del(pos);
+               kfree(p2p);
+       }
+
+       /* Free P2P mapping created in the peer nodes for the given node */
+       for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
+               peer_dev = &scif_dev[bd];
+               list_for_each_safe(pos, tmp, &peer_dev->p2p) {
+                       p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+                       if (p2p->ppi_peer_id == scifdev->node) {
+                               dma_unmap_sg(&peer_dev->sdev->dev,
+                                            p2p->ppi_sg[SCIF_PPI_MMIO],
+                                            p2p->sg_nentries[SCIF_PPI_MMIO],
+                                            DMA_BIDIRECTIONAL);
+                               dma_unmap_sg(&peer_dev->sdev->dev,
+                                            p2p->ppi_sg[SCIF_PPI_APER],
+                                            p2p->sg_nentries[SCIF_PPI_APER],
+                                            DMA_BIDIRECTIONAL);
+                               scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+                               scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+                               list_del(pos);
+                               kfree(p2p);
+                       }
+               }
+       }
+       mutex_unlock(&scif_info.conflock);
+}