Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / scsi / cxgbi / libcxgbi.c
diff --git a/kernel/drivers/scsi/cxgbi/libcxgbi.c b/kernel/drivers/scsi/cxgbi/libcxgbi.c
new file mode 100644 (file)
index 0000000..eb58afc
--- /dev/null
@@ -0,0 +1,2931 @@
+/*
+ * libcxgbi.c: Chelsio common library for T3/T4 iSCSI driver.
+ *
+ * Copyright (c) 2010 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie@chelsio.com)
+ * Written by: Rakesh Ranjan (rranjan@chelsio.com)
+ */
+
+#define pr_fmt(fmt)    KBUILD_MODNAME ":%s: " fmt, __func__
+
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/pci.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_host.h>
+#include <linux/if_vlan.h>
+#include <linux/inet.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+
+#include <linux/inetdevice.h>  /* ip_dev_find */
+#include <linux/module.h>
+#include <net/tcp.h>
+
+static unsigned int dbg_level;
+
+#include "libcxgbi.h"
+
+#define DRV_MODULE_NAME                "libcxgbi"
+#define DRV_MODULE_DESC                "Chelsio iSCSI driver library"
+#define DRV_MODULE_VERSION     "0.9.0"
+#define DRV_MODULE_RELDATE     "Jun. 2010"
+
+MODULE_AUTHOR("Chelsio Communications, Inc.");
+MODULE_DESCRIPTION(DRV_MODULE_DESC);
+MODULE_VERSION(DRV_MODULE_VERSION);
+MODULE_LICENSE("GPL");
+
+module_param(dbg_level, uint, 0644);
+MODULE_PARM_DESC(dbg_level, "libiscsi debug level (default=0)");
+
+
+/*
+ * cxgbi device management
+ * maintains a list of the cxgbi devices
+ */
+static LIST_HEAD(cdev_list);
+static DEFINE_MUTEX(cdev_mutex);
+
+static LIST_HEAD(cdev_rcu_list);
+static DEFINE_SPINLOCK(cdev_rcu_lock);
+
+int cxgbi_device_portmap_create(struct cxgbi_device *cdev, unsigned int base,
+                               unsigned int max_conn)
+{
+       struct cxgbi_ports_map *pmap = &cdev->pmap;
+
+       pmap->port_csk = cxgbi_alloc_big_mem(max_conn *
+                                            sizeof(struct cxgbi_sock *),
+                                            GFP_KERNEL);
+       if (!pmap->port_csk) {
+               pr_warn("cdev 0x%p, portmap OOM %u.\n", cdev, max_conn);
+               return -ENOMEM;
+       }
+
+       pmap->max_connect = max_conn;
+       pmap->sport_base = base;
+       spin_lock_init(&pmap->lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_portmap_create);
+
+void cxgbi_device_portmap_cleanup(struct cxgbi_device *cdev)
+{
+       struct cxgbi_ports_map *pmap = &cdev->pmap;
+       struct cxgbi_sock *csk;
+       int i;
+
+       for (i = 0; i < pmap->max_connect; i++) {
+               if (pmap->port_csk[i]) {
+                       csk = pmap->port_csk[i];
+                       pmap->port_csk[i] = NULL;
+                       log_debug(1 << CXGBI_DBG_SOCK,
+                               "csk 0x%p, cdev 0x%p, offload down.\n",
+                               csk, cdev);
+                       spin_lock_bh(&csk->lock);
+                       cxgbi_sock_set_flag(csk, CTPF_OFFLOAD_DOWN);
+                       cxgbi_sock_closed(csk);
+                       spin_unlock_bh(&csk->lock);
+                       cxgbi_sock_put(csk);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_portmap_cleanup);
+
+static inline void cxgbi_device_destroy(struct cxgbi_device *cdev)
+{
+       log_debug(1 << CXGBI_DBG_DEV,
+               "cdev 0x%p, p# %u.\n", cdev, cdev->nports);
+       cxgbi_hbas_remove(cdev);
+       cxgbi_device_portmap_cleanup(cdev);
+       if (cdev->dev_ddp_cleanup)
+               cdev->dev_ddp_cleanup(cdev);
+       else
+               cxgbi_ddp_cleanup(cdev);
+       if (cdev->ddp)
+               cxgbi_ddp_cleanup(cdev);
+       if (cdev->pmap.max_connect)
+               cxgbi_free_big_mem(cdev->pmap.port_csk);
+       kfree(cdev);
+}
+
+struct cxgbi_device *cxgbi_device_register(unsigned int extra,
+                                          unsigned int nports)
+{
+       struct cxgbi_device *cdev;
+
+       cdev = kzalloc(sizeof(*cdev) + extra + nports *
+                       (sizeof(struct cxgbi_hba *) +
+                        sizeof(struct net_device *)),
+                       GFP_KERNEL);
+       if (!cdev) {
+               pr_warn("nport %d, OOM.\n", nports);
+               return NULL;
+       }
+       cdev->ports = (struct net_device **)(cdev + 1);
+       cdev->hbas = (struct cxgbi_hba **)(((char*)cdev->ports) + nports *
+                                               sizeof(struct net_device *));
+       if (extra)
+               cdev->dd_data = ((char *)cdev->hbas) +
+                               nports * sizeof(struct cxgbi_hba *);
+       spin_lock_init(&cdev->pmap.lock);
+
+       mutex_lock(&cdev_mutex);
+       list_add_tail(&cdev->list_head, &cdev_list);
+       mutex_unlock(&cdev_mutex);
+
+       spin_lock(&cdev_rcu_lock);
+       list_add_tail_rcu(&cdev->rcu_node, &cdev_rcu_list);
+       spin_unlock(&cdev_rcu_lock);
+
+       log_debug(1 << CXGBI_DBG_DEV,
+               "cdev 0x%p, p# %u.\n", cdev, nports);
+       return cdev;
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_register);
+
+void cxgbi_device_unregister(struct cxgbi_device *cdev)
+{
+       log_debug(1 << CXGBI_DBG_DEV,
+               "cdev 0x%p, p# %u,%s.\n",
+               cdev, cdev->nports, cdev->nports ? cdev->ports[0]->name : "");
+
+       mutex_lock(&cdev_mutex);
+       list_del(&cdev->list_head);
+       mutex_unlock(&cdev_mutex);
+
+       spin_lock(&cdev_rcu_lock);
+       list_del_rcu(&cdev->rcu_node);
+       spin_unlock(&cdev_rcu_lock);
+       synchronize_rcu();
+
+       cxgbi_device_destroy(cdev);
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_unregister);
+
+void cxgbi_device_unregister_all(unsigned int flag)
+{
+       struct cxgbi_device *cdev, *tmp;
+
+       mutex_lock(&cdev_mutex);
+       list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
+               if ((cdev->flags & flag) == flag) {
+                       mutex_unlock(&cdev_mutex);
+                       cxgbi_device_unregister(cdev);
+                       mutex_lock(&cdev_mutex);
+               }
+       }
+       mutex_unlock(&cdev_mutex);
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_unregister_all);
+
+struct cxgbi_device *cxgbi_device_find_by_lldev(void *lldev)
+{
+       struct cxgbi_device *cdev, *tmp;
+
+       mutex_lock(&cdev_mutex);
+       list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
+               if (cdev->lldev == lldev) {
+                       mutex_unlock(&cdev_mutex);
+                       return cdev;
+               }
+       }
+       mutex_unlock(&cdev_mutex);
+
+       log_debug(1 << CXGBI_DBG_DEV,
+               "lldev 0x%p, NO match found.\n", lldev);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_find_by_lldev);
+
+struct cxgbi_device *cxgbi_device_find_by_netdev(struct net_device *ndev,
+                                                int *port)
+{
+       struct net_device *vdev = NULL;
+       struct cxgbi_device *cdev, *tmp;
+       int i;
+
+       if (ndev->priv_flags & IFF_802_1Q_VLAN) {
+               vdev = ndev;
+               ndev = vlan_dev_real_dev(ndev);
+               log_debug(1 << CXGBI_DBG_DEV,
+                       "vlan dev %s -> %s.\n", vdev->name, ndev->name);
+       }
+
+       mutex_lock(&cdev_mutex);
+       list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
+               for (i = 0; i < cdev->nports; i++) {
+                       if (ndev == cdev->ports[i]) {
+                               cdev->hbas[i]->vdev = vdev;
+                               mutex_unlock(&cdev_mutex);
+                               if (port)
+                                       *port = i;
+                               return cdev;
+                       }
+               }
+       }
+       mutex_unlock(&cdev_mutex);
+       log_debug(1 << CXGBI_DBG_DEV,
+               "ndev 0x%p, %s, NO match found.\n", ndev, ndev->name);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_find_by_netdev);
+
+struct cxgbi_device *cxgbi_device_find_by_netdev_rcu(struct net_device *ndev,
+                                                    int *port)
+{
+       struct net_device *vdev = NULL;
+       struct cxgbi_device *cdev;
+       int i;
+
+       if (ndev->priv_flags & IFF_802_1Q_VLAN) {
+               vdev = ndev;
+               ndev = vlan_dev_real_dev(ndev);
+               pr_info("vlan dev %s -> %s.\n", vdev->name, ndev->name);
+       }
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(cdev, &cdev_rcu_list, rcu_node) {
+               for (i = 0; i < cdev->nports; i++) {
+                       if (ndev == cdev->ports[i]) {
+                               cdev->hbas[i]->vdev = vdev;
+                               rcu_read_unlock();
+                               if (port)
+                                       *port = i;
+                               return cdev;
+                       }
+               }
+       }
+       rcu_read_unlock();
+
+       log_debug(1 << CXGBI_DBG_DEV,
+                 "ndev 0x%p, %s, NO match found.\n", ndev, ndev->name);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(cxgbi_device_find_by_netdev_rcu);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct cxgbi_device *cxgbi_device_find_by_mac(struct net_device *ndev,
+                                                    int *port)
+{
+       struct net_device *vdev = NULL;
+       struct cxgbi_device *cdev, *tmp;
+       int i;
+
+       if (ndev->priv_flags & IFF_802_1Q_VLAN) {
+               vdev = ndev;
+               ndev = vlan_dev_real_dev(ndev);
+               pr_info("vlan dev %s -> %s.\n", vdev->name, ndev->name);
+       }
+
+       mutex_lock(&cdev_mutex);
+       list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) {
+               for (i = 0; i < cdev->nports; i++) {
+                       if (!memcmp(ndev->dev_addr, cdev->ports[i]->dev_addr,
+                                   MAX_ADDR_LEN)) {
+                               cdev->hbas[i]->vdev = vdev;
+                               mutex_unlock(&cdev_mutex);
+                               if (port)
+                                       *port = i;
+                               return cdev;
+                       }
+               }
+       }
+       mutex_unlock(&cdev_mutex);
+       log_debug(1 << CXGBI_DBG_DEV,
+                 "ndev 0x%p, %s, NO match mac found.\n",
+                 ndev, ndev->name);
+       return NULL;
+}
+#endif
+
+void cxgbi_hbas_remove(struct cxgbi_device *cdev)
+{
+       int i;
+       struct cxgbi_hba *chba;
+
+       log_debug(1 << CXGBI_DBG_DEV,
+               "cdev 0x%p, p#%u.\n", cdev, cdev->nports);
+
+       for (i = 0; i < cdev->nports; i++) {
+               chba = cdev->hbas[i];
+               if (chba) {
+                       cdev->hbas[i] = NULL;
+                       iscsi_host_remove(chba->shost);
+                       pci_dev_put(cdev->pdev);
+                       iscsi_host_free(chba->shost);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_hbas_remove);
+
+int cxgbi_hbas_add(struct cxgbi_device *cdev, u64 max_lun,
+               unsigned int max_id, struct scsi_host_template *sht,
+               struct scsi_transport_template *stt)
+{
+       struct cxgbi_hba *chba;
+       struct Scsi_Host *shost;
+       int i, err;
+
+       log_debug(1 << CXGBI_DBG_DEV, "cdev 0x%p, p#%u.\n", cdev, cdev->nports);
+
+       for (i = 0; i < cdev->nports; i++) {
+               shost = iscsi_host_alloc(sht, sizeof(*chba), 1);
+               if (!shost) {
+                       pr_info("0x%p, p%d, %s, host alloc failed.\n",
+                               cdev, i, cdev->ports[i]->name);
+                       err = -ENOMEM;
+                       goto err_out;
+               }
+
+               shost->transportt = stt;
+               shost->max_lun = max_lun;
+               shost->max_id = max_id;
+               shost->max_channel = 0;
+               shost->max_cmd_len = 16;
+
+               chba = iscsi_host_priv(shost);
+               chba->cdev = cdev;
+               chba->ndev = cdev->ports[i];
+               chba->shost = shost;
+
+               log_debug(1 << CXGBI_DBG_DEV,
+                       "cdev 0x%p, p#%d %s: chba 0x%p.\n",
+                       cdev, i, cdev->ports[i]->name, chba);
+
+               pci_dev_get(cdev->pdev);
+               err = iscsi_host_add(shost, &cdev->pdev->dev);
+               if (err) {
+                       pr_info("cdev 0x%p, p#%d %s, host add failed.\n",
+                               cdev, i, cdev->ports[i]->name);
+                       pci_dev_put(cdev->pdev);
+                       scsi_host_put(shost);
+                       goto  err_out;
+               }
+
+               cdev->hbas[i] = chba;
+       }
+
+       return 0;
+
+err_out:
+       cxgbi_hbas_remove(cdev);
+       return err;
+}
+EXPORT_SYMBOL_GPL(cxgbi_hbas_add);
+
+/*
+ * iSCSI offload
+ *
+ * - source port management
+ *   To find a free source port in the port allocation map we use a very simple
+ *   rotor scheme to look for the next free port.
+ *
+ *   If a source port has been specified make sure that it doesn't collide with
+ *   our normal source port allocation map.  If it's outside the range of our
+ *   allocation/deallocation scheme just let them use it.
+ *
+ *   If the source port is outside our allocation range, the caller is
+ *   responsible for keeping track of their port usage.
+ */
+
+static struct cxgbi_sock *find_sock_on_port(struct cxgbi_device *cdev,
+                                           unsigned char port_id)
+{
+       struct cxgbi_ports_map *pmap = &cdev->pmap;
+       unsigned int i;
+       unsigned int used;
+
+       if (!pmap->max_connect || !pmap->used)
+               return NULL;
+
+       spin_lock_bh(&pmap->lock);
+       used = pmap->used;
+       for (i = 0; used && i < pmap->max_connect; i++) {
+               struct cxgbi_sock *csk = pmap->port_csk[i];
+
+               if (csk) {
+                       if (csk->port_id == port_id) {
+                               spin_unlock_bh(&pmap->lock);
+                               return csk;
+                       }
+                       used--;
+               }
+       }
+       spin_unlock_bh(&pmap->lock);
+
+       return NULL;
+}
+
+static int sock_get_port(struct cxgbi_sock *csk)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       struct cxgbi_ports_map *pmap = &cdev->pmap;
+       unsigned int start;
+       int idx;
+       __be16 *port;
+
+       if (!pmap->max_connect) {
+               pr_err("cdev 0x%p, p#%u %s, NO port map.\n",
+                          cdev, csk->port_id, cdev->ports[csk->port_id]->name);
+               return -EADDRNOTAVAIL;
+       }
+
+       if (csk->csk_family == AF_INET)
+               port = &csk->saddr.sin_port;
+       else /* ipv6 */
+               port = &csk->saddr6.sin6_port;
+
+       if (*port) {
+               pr_err("source port NON-ZERO %u.\n",
+                       ntohs(*port));
+               return -EADDRINUSE;
+       }
+
+       spin_lock_bh(&pmap->lock);
+       if (pmap->used >= pmap->max_connect) {
+               spin_unlock_bh(&pmap->lock);
+               pr_info("cdev 0x%p, p#%u %s, ALL ports used.\n",
+                       cdev, csk->port_id, cdev->ports[csk->port_id]->name);
+               return -EADDRNOTAVAIL;
+       }
+
+       start = idx = pmap->next;
+       do {
+               if (++idx >= pmap->max_connect)
+                       idx = 0;
+               if (!pmap->port_csk[idx]) {
+                       pmap->used++;
+                       *port = htons(pmap->sport_base + idx);
+                       pmap->next = idx;
+                       pmap->port_csk[idx] = csk;
+                       spin_unlock_bh(&pmap->lock);
+                       cxgbi_sock_get(csk);
+                       log_debug(1 << CXGBI_DBG_SOCK,
+                               "cdev 0x%p, p#%u %s, p %u, %u.\n",
+                               cdev, csk->port_id,
+                               cdev->ports[csk->port_id]->name,
+                               pmap->sport_base + idx, pmap->next);
+                       return 0;
+               }
+       } while (idx != start);
+       spin_unlock_bh(&pmap->lock);
+
+       /* should not happen */
+       pr_warn("cdev 0x%p, p#%u %s, next %u?\n",
+               cdev, csk->port_id, cdev->ports[csk->port_id]->name,
+               pmap->next);
+       return -EADDRNOTAVAIL;
+}
+
+static void sock_put_port(struct cxgbi_sock *csk)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       struct cxgbi_ports_map *pmap = &cdev->pmap;
+       __be16 *port;
+
+       if (csk->csk_family == AF_INET)
+               port = &csk->saddr.sin_port;
+       else /* ipv6 */
+               port = &csk->saddr6.sin6_port;
+
+       if (*port) {
+               int idx = ntohs(*port) - pmap->sport_base;
+
+               *port = 0;
+               if (idx < 0 || idx >= pmap->max_connect) {
+                       pr_err("cdev 0x%p, p#%u %s, port %u OOR.\n",
+                               cdev, csk->port_id,
+                               cdev->ports[csk->port_id]->name,
+                               ntohs(*port));
+                       return;
+               }
+
+               spin_lock_bh(&pmap->lock);
+               pmap->port_csk[idx] = NULL;
+               pmap->used--;
+               spin_unlock_bh(&pmap->lock);
+
+               log_debug(1 << CXGBI_DBG_SOCK,
+                       "cdev 0x%p, p#%u %s, release %u.\n",
+                       cdev, csk->port_id, cdev->ports[csk->port_id]->name,
+                       pmap->sport_base + idx);
+
+               cxgbi_sock_put(csk);
+       }
+}
+
+/*
+ * iscsi tcp connection
+ */
+void cxgbi_sock_free_cpl_skbs(struct cxgbi_sock *csk)
+{
+       if (csk->cpl_close) {
+               kfree_skb(csk->cpl_close);
+               csk->cpl_close = NULL;
+       }
+       if (csk->cpl_abort_req) {
+               kfree_skb(csk->cpl_abort_req);
+               csk->cpl_abort_req = NULL;
+       }
+       if (csk->cpl_abort_rpl) {
+               kfree_skb(csk->cpl_abort_rpl);
+               csk->cpl_abort_rpl = NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_free_cpl_skbs);
+
+static struct cxgbi_sock *cxgbi_sock_create(struct cxgbi_device *cdev)
+{
+       struct cxgbi_sock *csk = kzalloc(sizeof(*csk), GFP_NOIO);
+
+       if (!csk) {
+               pr_info("alloc csk %zu failed.\n", sizeof(*csk));
+               return NULL;
+       }
+
+       if (cdev->csk_alloc_cpls(csk) < 0) {
+               pr_info("csk 0x%p, alloc cpls failed.\n", csk);
+               kfree(csk);
+               return NULL;
+       }
+
+       spin_lock_init(&csk->lock);
+       kref_init(&csk->refcnt);
+       skb_queue_head_init(&csk->receive_queue);
+       skb_queue_head_init(&csk->write_queue);
+       setup_timer(&csk->retry_timer, NULL, (unsigned long)csk);
+       rwlock_init(&csk->callback_lock);
+       csk->cdev = cdev;
+       csk->flags = 0;
+       cxgbi_sock_set_state(csk, CTP_CLOSED);
+
+       log_debug(1 << CXGBI_DBG_SOCK, "cdev 0x%p, new csk 0x%p.\n", cdev, csk);
+
+       return csk;
+}
+
+static struct rtable *find_route_ipv4(struct flowi4 *fl4,
+                                     __be32 saddr, __be32 daddr,
+                                     __be16 sport, __be16 dport, u8 tos)
+{
+       struct rtable *rt;
+
+       rt = ip_route_output_ports(&init_net, fl4, NULL, daddr, saddr,
+                                  dport, sport, IPPROTO_TCP, tos, 0);
+       if (IS_ERR(rt))
+               return NULL;
+
+       return rt;
+}
+
+static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
+{
+       struct sockaddr_in *daddr = (struct sockaddr_in *)dst_addr;
+       struct dst_entry *dst;
+       struct net_device *ndev;
+       struct cxgbi_device *cdev;
+       struct rtable *rt = NULL;
+       struct neighbour *n;
+       struct flowi4 fl4;
+       struct cxgbi_sock *csk = NULL;
+       unsigned int mtu = 0;
+       int port = 0xFFFF;
+       int err = 0;
+
+       rt = find_route_ipv4(&fl4, 0, daddr->sin_addr.s_addr, 0, daddr->sin_port, 0);
+       if (!rt) {
+               pr_info("no route to ipv4 0x%x, port %u.\n",
+                       be32_to_cpu(daddr->sin_addr.s_addr),
+                       be16_to_cpu(daddr->sin_port));
+               err = -ENETUNREACH;
+               goto err_out;
+       }
+       dst = &rt->dst;
+       n = dst_neigh_lookup(dst, &daddr->sin_addr.s_addr);
+       if (!n) {
+               err = -ENODEV;
+               goto rel_rt;
+       }
+       ndev = n->dev;
+
+       if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+               pr_info("multi-cast route %pI4, port %u, dev %s.\n",
+                       &daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
+                       ndev->name);
+               err = -ENETUNREACH;
+               goto rel_neigh;
+       }
+
+       if (ndev->flags & IFF_LOOPBACK) {
+               ndev = ip_dev_find(&init_net, daddr->sin_addr.s_addr);
+               mtu = ndev->mtu;
+               pr_info("rt dev %s, loopback -> %s, mtu %u.\n",
+                       n->dev->name, ndev->name, mtu);
+       }
+
+       cdev = cxgbi_device_find_by_netdev(ndev, &port);
+       if (!cdev) {
+               pr_info("dst %pI4, %s, NOT cxgbi device.\n",
+                       &daddr->sin_addr.s_addr, ndev->name);
+               err = -ENETUNREACH;
+               goto rel_neigh;
+       }
+       log_debug(1 << CXGBI_DBG_SOCK,
+               "route to %pI4 :%u, ndev p#%d,%s, cdev 0x%p.\n",
+               &daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
+                          port, ndev->name, cdev);
+
+       csk = cxgbi_sock_create(cdev);
+       if (!csk) {
+               err = -ENOMEM;
+               goto rel_neigh;
+       }
+       csk->cdev = cdev;
+       csk->port_id = port;
+       csk->mtu = mtu;
+       csk->dst = dst;
+
+       csk->csk_family = AF_INET;
+       csk->daddr.sin_addr.s_addr = daddr->sin_addr.s_addr;
+       csk->daddr.sin_port = daddr->sin_port;
+       csk->daddr.sin_family = daddr->sin_family;
+       csk->saddr.sin_family = daddr->sin_family;
+       csk->saddr.sin_addr.s_addr = fl4.saddr;
+       neigh_release(n);
+
+       return csk;
+
+rel_neigh:
+       neigh_release(n);
+
+rel_rt:
+       ip_rt_put(rt);
+       if (csk)
+               cxgbi_sock_closed(csk);
+err_out:
+       return ERR_PTR(err);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct rt6_info *find_route_ipv6(const struct in6_addr *saddr,
+                                       const struct in6_addr *daddr)
+{
+       struct flowi6 fl;
+
+       if (saddr)
+               memcpy(&fl.saddr, saddr, sizeof(struct in6_addr));
+       if (daddr)
+               memcpy(&fl.daddr, daddr, sizeof(struct in6_addr));
+       return (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+}
+
+static struct cxgbi_sock *cxgbi_check_route6(struct sockaddr *dst_addr)
+{
+       struct sockaddr_in6 *daddr6 = (struct sockaddr_in6 *)dst_addr;
+       struct dst_entry *dst;
+       struct net_device *ndev;
+       struct cxgbi_device *cdev;
+       struct rt6_info *rt = NULL;
+       struct neighbour *n;
+       struct in6_addr pref_saddr;
+       struct cxgbi_sock *csk = NULL;
+       unsigned int mtu = 0;
+       int port = 0xFFFF;
+       int err = 0;
+
+       rt = find_route_ipv6(NULL, &daddr6->sin6_addr);
+
+       if (!rt) {
+               pr_info("no route to ipv6 %pI6 port %u\n",
+                       daddr6->sin6_addr.s6_addr,
+                       be16_to_cpu(daddr6->sin6_port));
+               err = -ENETUNREACH;
+               goto err_out;
+       }
+
+       dst = &rt->dst;
+
+       n = dst_neigh_lookup(dst, &daddr6->sin6_addr);
+
+       if (!n) {
+               pr_info("%pI6, port %u, dst no neighbour.\n",
+                       daddr6->sin6_addr.s6_addr,
+                       be16_to_cpu(daddr6->sin6_port));
+               err = -ENETUNREACH;
+               goto rel_rt;
+       }
+       ndev = n->dev;
+
+       if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
+               pr_info("multi-cast route %pI6 port %u, dev %s.\n",
+                       daddr6->sin6_addr.s6_addr,
+                       ntohs(daddr6->sin6_port), ndev->name);
+               err = -ENETUNREACH;
+               goto rel_rt;
+       }
+
+       cdev = cxgbi_device_find_by_netdev(ndev, &port);
+       if (!cdev)
+               cdev = cxgbi_device_find_by_mac(ndev, &port);
+       if (!cdev) {
+               pr_info("dst %pI6 %s, NOT cxgbi device.\n",
+                       daddr6->sin6_addr.s6_addr, ndev->name);
+               err = -ENETUNREACH;
+               goto rel_rt;
+       }
+       log_debug(1 << CXGBI_DBG_SOCK,
+                 "route to %pI6 :%u, ndev p#%d,%s, cdev 0x%p.\n",
+                 daddr6->sin6_addr.s6_addr, ntohs(daddr6->sin6_port), port,
+                 ndev->name, cdev);
+
+       csk = cxgbi_sock_create(cdev);
+       if (!csk) {
+               err = -ENOMEM;
+               goto rel_rt;
+       }
+       csk->cdev = cdev;
+       csk->port_id = port;
+       csk->mtu = mtu;
+       csk->dst = dst;
+
+       if (ipv6_addr_any(&rt->rt6i_prefsrc.addr)) {
+               struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
+
+               err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
+                                        &daddr6->sin6_addr, 0, &pref_saddr);
+               if (err) {
+                       pr_info("failed to get source address to reach %pI6\n",
+                               &daddr6->sin6_addr);
+                       goto rel_rt;
+               }
+       } else {
+               pref_saddr = rt->rt6i_prefsrc.addr;
+       }
+
+       csk->csk_family = AF_INET6;
+       csk->daddr6.sin6_addr = daddr6->sin6_addr;
+       csk->daddr6.sin6_port = daddr6->sin6_port;
+       csk->daddr6.sin6_family = daddr6->sin6_family;
+       csk->saddr6.sin6_family = daddr6->sin6_family;
+       csk->saddr6.sin6_addr = pref_saddr;
+
+       neigh_release(n);
+       return csk;
+
+rel_rt:
+       if (n)
+               neigh_release(n);
+
+       ip6_rt_put(rt);
+       if (csk)
+               cxgbi_sock_closed(csk);
+err_out:
+       return ERR_PTR(err);
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+void cxgbi_sock_established(struct cxgbi_sock *csk, unsigned int snd_isn,
+                       unsigned int opt)
+{
+       csk->write_seq = csk->snd_nxt = csk->snd_una = snd_isn;
+       dst_confirm(csk->dst);
+       smp_mb();
+       cxgbi_sock_set_state(csk, CTP_ESTABLISHED);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_established);
+
+static void cxgbi_inform_iscsi_conn_closing(struct cxgbi_sock *csk)
+{
+       log_debug(1 << CXGBI_DBG_SOCK,
+               "csk 0x%p, state %u, flags 0x%lx, conn 0x%p.\n",
+               csk, csk->state, csk->flags, csk->user_data);
+
+       if (csk->state != CTP_ESTABLISHED) {
+               read_lock_bh(&csk->callback_lock);
+               if (csk->user_data)
+                       iscsi_conn_failure(csk->user_data,
+                                       ISCSI_ERR_TCP_CONN_CLOSE);
+               read_unlock_bh(&csk->callback_lock);
+       }
+}
+
+void cxgbi_sock_closed(struct cxgbi_sock *csk)
+{
+       log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
+               csk, (csk)->state, (csk)->flags, (csk)->tid);
+       cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED);
+       if (csk->state == CTP_ACTIVE_OPEN || csk->state == CTP_CLOSED)
+               return;
+       if (csk->saddr.sin_port)
+               sock_put_port(csk);
+       if (csk->dst)
+               dst_release(csk->dst);
+       csk->cdev->csk_release_offload_resources(csk);
+       cxgbi_sock_set_state(csk, CTP_CLOSED);
+       cxgbi_inform_iscsi_conn_closing(csk);
+       cxgbi_sock_put(csk);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_closed);
+
+static void need_active_close(struct cxgbi_sock *csk)
+{
+       int data_lost;
+       int close_req = 0;
+
+       log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
+               csk, (csk)->state, (csk)->flags, (csk)->tid);
+       spin_lock_bh(&csk->lock);
+       dst_confirm(csk->dst);
+       data_lost = skb_queue_len(&csk->receive_queue);
+       __skb_queue_purge(&csk->receive_queue);
+
+       if (csk->state == CTP_ACTIVE_OPEN)
+               cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED);
+       else if (csk->state == CTP_ESTABLISHED) {
+               close_req = 1;
+               cxgbi_sock_set_state(csk, CTP_ACTIVE_CLOSE);
+       } else if (csk->state == CTP_PASSIVE_CLOSE) {
+               close_req = 1;
+               cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2);
+       }
+
+       if (close_req) {
+               if (data_lost)
+                       csk->cdev->csk_send_abort_req(csk);
+               else
+                       csk->cdev->csk_send_close_req(csk);
+       }
+
+       spin_unlock_bh(&csk->lock);
+}
+
+void cxgbi_sock_fail_act_open(struct cxgbi_sock *csk, int errno)
+{
+       pr_info("csk 0x%p,%u,%lx, %pI4:%u-%pI4:%u, err %d.\n",
+                       csk, csk->state, csk->flags,
+                       &csk->saddr.sin_addr.s_addr, csk->saddr.sin_port,
+                       &csk->daddr.sin_addr.s_addr, csk->daddr.sin_port,
+                       errno);
+
+       cxgbi_sock_set_state(csk, CTP_CONNECTING);
+       csk->err = errno;
+       cxgbi_sock_closed(csk);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_fail_act_open);
+
+void cxgbi_sock_act_open_req_arp_failure(void *handle, struct sk_buff *skb)
+{
+       struct cxgbi_sock *csk = (struct cxgbi_sock *)skb->sk;
+
+       log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
+               csk, (csk)->state, (csk)->flags, (csk)->tid);
+       cxgbi_sock_get(csk);
+       spin_lock_bh(&csk->lock);
+       if (csk->state == CTP_ACTIVE_OPEN)
+               cxgbi_sock_fail_act_open(csk, -EHOSTUNREACH);
+       spin_unlock_bh(&csk->lock);
+       cxgbi_sock_put(csk);
+       __kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_act_open_req_arp_failure);
+
+void cxgbi_sock_rcv_abort_rpl(struct cxgbi_sock *csk)
+{
+       cxgbi_sock_get(csk);
+       spin_lock_bh(&csk->lock);
+
+       cxgbi_sock_set_flag(csk, CTPF_ABORT_RPL_RCVD);
+       if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING)) {
+               cxgbi_sock_clear_flag(csk, CTPF_ABORT_RPL_PENDING);
+               if (cxgbi_sock_flag(csk, CTPF_ABORT_REQ_RCVD))
+                       pr_err("csk 0x%p,%u,0x%lx,%u,ABT_RPL_RSS.\n",
+                              csk, csk->state, csk->flags, csk->tid);
+               cxgbi_sock_closed(csk);
+       }
+
+       spin_unlock_bh(&csk->lock);
+       cxgbi_sock_put(csk);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_abort_rpl);
+
+void cxgbi_sock_rcv_peer_close(struct cxgbi_sock *csk)
+{
+       log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
+               csk, (csk)->state, (csk)->flags, (csk)->tid);
+       cxgbi_sock_get(csk);
+       spin_lock_bh(&csk->lock);
+
+       if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING))
+               goto done;
+
+       switch (csk->state) {
+       case CTP_ESTABLISHED:
+               cxgbi_sock_set_state(csk, CTP_PASSIVE_CLOSE);
+               break;
+       case CTP_ACTIVE_CLOSE:
+               cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2);
+               break;
+       case CTP_CLOSE_WAIT_1:
+               cxgbi_sock_closed(csk);
+               break;
+       case CTP_ABORTING:
+               break;
+       default:
+               pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n",
+                       csk, csk->state, csk->flags, csk->tid);
+       }
+       cxgbi_inform_iscsi_conn_closing(csk);
+done:
+       spin_unlock_bh(&csk->lock);
+       cxgbi_sock_put(csk);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_peer_close);
+
+void cxgbi_sock_rcv_close_conn_rpl(struct cxgbi_sock *csk, u32 snd_nxt)
+{
+       log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n",
+               csk, (csk)->state, (csk)->flags, (csk)->tid);
+       cxgbi_sock_get(csk);
+       spin_lock_bh(&csk->lock);
+
+       csk->snd_una = snd_nxt - 1;
+       if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING))
+               goto done;
+
+       switch (csk->state) {
+       case CTP_ACTIVE_CLOSE:
+               cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_1);
+               break;
+       case CTP_CLOSE_WAIT_1:
+       case CTP_CLOSE_WAIT_2:
+               cxgbi_sock_closed(csk);
+               break;
+       case CTP_ABORTING:
+               break;
+       default:
+               pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n",
+                       csk, csk->state, csk->flags, csk->tid);
+       }
+done:
+       spin_unlock_bh(&csk->lock);
+       cxgbi_sock_put(csk);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_close_conn_rpl);
+
+void cxgbi_sock_rcv_wr_ack(struct cxgbi_sock *csk, unsigned int credits,
+                          unsigned int snd_una, int seq_chk)
+{
+       log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK,
+                       "csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, snd_una %u,%d.\n",
+                       csk, csk->state, csk->flags, csk->tid, credits,
+                       csk->wr_cred, csk->wr_una_cred, snd_una, seq_chk);
+
+       spin_lock_bh(&csk->lock);
+
+       csk->wr_cred += credits;
+       if (csk->wr_una_cred > csk->wr_max_cred - csk->wr_cred)
+               csk->wr_una_cred = csk->wr_max_cred - csk->wr_cred;
+
+       while (credits) {
+               struct sk_buff *p = cxgbi_sock_peek_wr(csk);
+
+               if (unlikely(!p)) {
+                       pr_err("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, empty.\n",
+                               csk, csk->state, csk->flags, csk->tid, credits,
+                               csk->wr_cred, csk->wr_una_cred);
+                       break;
+               }
+
+               if (unlikely(credits < p->csum)) {
+                       pr_warn("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, < %u.\n",
+                               csk, csk->state, csk->flags, csk->tid,
+                               credits, csk->wr_cred, csk->wr_una_cred,
+                               p->csum);
+                       p->csum -= credits;
+                       break;
+               } else {
+                       cxgbi_sock_dequeue_wr(csk);
+                       credits -= p->csum;
+                       kfree_skb(p);
+               }
+       }
+
+       cxgbi_sock_check_wr_invariants(csk);
+
+       if (seq_chk) {
+               if (unlikely(before(snd_una, csk->snd_una))) {
+                       pr_warn("csk 0x%p,%u,0x%lx,%u, snd_una %u/%u.",
+                               csk, csk->state, csk->flags, csk->tid, snd_una,
+                               csk->snd_una);
+                       goto done;
+               }
+
+               if (csk->snd_una != snd_una) {
+                       csk->snd_una = snd_una;
+                       dst_confirm(csk->dst);
+               }
+       }
+
+       if (skb_queue_len(&csk->write_queue)) {
+               if (csk->cdev->csk_push_tx_frames(csk, 0))
+                       cxgbi_conn_tx_open(csk);
+       } else
+               cxgbi_conn_tx_open(csk);
+done:
+       spin_unlock_bh(&csk->lock);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_wr_ack);
+
+static unsigned int cxgbi_sock_find_best_mtu(struct cxgbi_sock *csk,
+                                            unsigned short mtu)
+{
+       int i = 0;
+
+       while (i < csk->cdev->nmtus - 1 && csk->cdev->mtus[i + 1] <= mtu)
+               ++i;
+
+       return i;
+}
+
+unsigned int cxgbi_sock_select_mss(struct cxgbi_sock *csk, unsigned int pmtu)
+{
+       unsigned int idx;
+       struct dst_entry *dst = csk->dst;
+
+       csk->advmss = dst_metric_advmss(dst);
+
+       if (csk->advmss > pmtu - 40)
+               csk->advmss = pmtu - 40;
+       if (csk->advmss < csk->cdev->mtus[0] - 40)
+               csk->advmss = csk->cdev->mtus[0] - 40;
+       idx = cxgbi_sock_find_best_mtu(csk, csk->advmss + 40);
+
+       return idx;
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_select_mss);
+
+void cxgbi_sock_skb_entail(struct cxgbi_sock *csk, struct sk_buff *skb)
+{
+       cxgbi_skcb_tcp_seq(skb) = csk->write_seq;
+       __skb_queue_tail(&csk->write_queue, skb);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_skb_entail);
+
+void cxgbi_sock_purge_wr_queue(struct cxgbi_sock *csk)
+{
+       struct sk_buff *skb;
+
+       while ((skb = cxgbi_sock_dequeue_wr(csk)) != NULL)
+               kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_purge_wr_queue);
+
+void cxgbi_sock_check_wr_invariants(const struct cxgbi_sock *csk)
+{
+       int pending = cxgbi_sock_count_pending_wrs(csk);
+
+       if (unlikely(csk->wr_cred + pending != csk->wr_max_cred))
+               pr_err("csk 0x%p, tid %u, credit %u + %u != %u.\n",
+                       csk, csk->tid, csk->wr_cred, pending, csk->wr_max_cred);
+}
+EXPORT_SYMBOL_GPL(cxgbi_sock_check_wr_invariants);
+
+static int cxgbi_sock_send_pdus(struct cxgbi_sock *csk, struct sk_buff *skb)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       struct sk_buff *next;
+       int err, copied = 0;
+
+       spin_lock_bh(&csk->lock);
+
+       if (csk->state != CTP_ESTABLISHED) {
+               log_debug(1 << CXGBI_DBG_PDU_TX,
+                       "csk 0x%p,%u,0x%lx,%u, EAGAIN.\n",
+                       csk, csk->state, csk->flags, csk->tid);
+               err = -EAGAIN;
+               goto out_err;
+       }
+
+       if (csk->err) {
+               log_debug(1 << CXGBI_DBG_PDU_TX,
+                       "csk 0x%p,%u,0x%lx,%u, EPIPE %d.\n",
+                       csk, csk->state, csk->flags, csk->tid, csk->err);
+               err = -EPIPE;
+               goto out_err;
+       }
+
+       if (csk->write_seq - csk->snd_una >= cdev->snd_win) {
+               log_debug(1 << CXGBI_DBG_PDU_TX,
+                       "csk 0x%p,%u,0x%lx,%u, FULL %u-%u >= %u.\n",
+                       csk, csk->state, csk->flags, csk->tid, csk->write_seq,
+                       csk->snd_una, cdev->snd_win);
+               err = -ENOBUFS;
+               goto out_err;
+       }
+
+       while (skb) {
+               int frags = skb_shinfo(skb)->nr_frags +
+                               (skb->len != skb->data_len);
+
+               if (unlikely(skb_headroom(skb) < cdev->skb_tx_rsvd)) {
+                       pr_err("csk 0x%p, skb head %u < %u.\n",
+                               csk, skb_headroom(skb), cdev->skb_tx_rsvd);
+                       err = -EINVAL;
+                       goto out_err;
+               }
+
+               if (frags >= SKB_WR_LIST_SIZE) {
+                       pr_err("csk 0x%p, frags %d, %u,%u >%u.\n",
+                               csk, skb_shinfo(skb)->nr_frags, skb->len,
+                               skb->data_len, (uint)(SKB_WR_LIST_SIZE));
+                       err = -EINVAL;
+                       goto out_err;
+               }
+
+               next = skb->next;
+               skb->next = NULL;
+               cxgbi_skcb_set_flag(skb, SKCBF_TX_NEED_HDR);
+               cxgbi_sock_skb_entail(csk, skb);
+               copied += skb->len;
+               csk->write_seq += skb->len +
+                               cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb));
+               skb = next;
+       }
+done:
+       if (likely(skb_queue_len(&csk->write_queue)))
+               cdev->csk_push_tx_frames(csk, 1);
+       spin_unlock_bh(&csk->lock);
+       return copied;
+
+out_err:
+       if (copied == 0 && err == -EPIPE)
+               copied = csk->err ? csk->err : -EPIPE;
+       else
+               copied = err;
+       goto done;
+}
+
+/*
+ * Direct Data Placement -
+ * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted
+ * final destination host-memory buffers based on the Initiator Task Tag (ITT)
+ * in Data-In or Target Task Tag (TTT) in Data-Out PDUs.
+ * The host memory address is programmed into h/w in the format of pagepod
+ * entries.
+ * The location of the pagepod entry is encoded into ddp tag which is used as
+ * the base for ITT/TTT.
+ */
+
+static unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4};
+static unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16};
+static unsigned char page_idx = DDP_PGIDX_MAX;
+
+static unsigned char sw_tag_idx_bits;
+static unsigned char sw_tag_age_bits;
+
+/*
+ * Direct-Data Placement page size adjustment
+ */
+static int ddp_adjust_page_table(void)
+{
+       int i;
+       unsigned int base_order, order;
+
+       if (PAGE_SIZE < (1UL << ddp_page_shift[0])) {
+               pr_info("PAGE_SIZE 0x%lx too small, min 0x%lx\n",
+                       PAGE_SIZE, 1UL << ddp_page_shift[0]);
+               return -EINVAL;
+       }
+
+       base_order = get_order(1UL << ddp_page_shift[0]);
+       order = get_order(1UL << PAGE_SHIFT);
+
+       for (i = 0; i < DDP_PGIDX_MAX; i++) {
+               /* first is the kernel page size, then just doubling */
+               ddp_page_order[i] = order - base_order + i;
+               ddp_page_shift[i] = PAGE_SHIFT + i;
+       }
+       return 0;
+}
+
+static int ddp_find_page_index(unsigned long pgsz)
+{
+       int i;
+
+       for (i = 0; i < DDP_PGIDX_MAX; i++) {
+               if (pgsz == (1UL << ddp_page_shift[i]))
+                       return i;
+       }
+       pr_info("ddp page size %lu not supported.\n", pgsz);
+       return DDP_PGIDX_MAX;
+}
+
+static void ddp_setup_host_page_size(void)
+{
+       if (page_idx == DDP_PGIDX_MAX) {
+               page_idx = ddp_find_page_index(PAGE_SIZE);
+
+               if (page_idx == DDP_PGIDX_MAX) {
+                       pr_info("system PAGE %lu, update hw.\n", PAGE_SIZE);
+                       if (ddp_adjust_page_table() < 0) {
+                               pr_info("PAGE %lu, disable ddp.\n", PAGE_SIZE);
+                               return;
+                       }
+                       page_idx = ddp_find_page_index(PAGE_SIZE);
+               }
+               pr_info("system PAGE %lu, ddp idx %u.\n", PAGE_SIZE, page_idx);
+       }
+}
+
+void cxgbi_ddp_page_size_factor(int *pgsz_factor)
+{
+       int i;
+
+       for (i = 0; i < DDP_PGIDX_MAX; i++)
+               pgsz_factor[i] = ddp_page_order[i];
+}
+EXPORT_SYMBOL_GPL(cxgbi_ddp_page_size_factor);
+
+/*
+ * DDP setup & teardown
+ */
+
+void cxgbi_ddp_ppod_set(struct cxgbi_pagepod *ppod,
+                       struct cxgbi_pagepod_hdr *hdr,
+                       struct cxgbi_gather_list *gl, unsigned int gidx)
+{
+       int i;
+
+       memcpy(ppod, hdr, sizeof(*hdr));
+       for (i = 0; i < (PPOD_PAGES_MAX + 1); i++, gidx++) {
+               ppod->addr[i] = gidx < gl->nelem ?
+                               cpu_to_be64(gl->phys_addr[gidx]) : 0ULL;
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_set);
+
+void cxgbi_ddp_ppod_clear(struct cxgbi_pagepod *ppod)
+{
+       memset(ppod, 0, sizeof(*ppod));
+}
+EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_clear);
+
+static inline int ddp_find_unused_entries(struct cxgbi_ddp_info *ddp,
+                                       unsigned int start, unsigned int max,
+                                       unsigned int count,
+                                       struct cxgbi_gather_list *gl)
+{
+       unsigned int i, j, k;
+
+       /*  not enough entries */
+       if ((max - start) < count) {
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "NOT enough entries %u+%u < %u.\n", start, count, max);
+               return -EBUSY;
+       }
+
+       max -= count;
+       spin_lock(&ddp->map_lock);
+       for (i = start; i < max;) {
+               for (j = 0, k = i; j < count; j++, k++) {
+                       if (ddp->gl_map[k])
+                               break;
+               }
+               if (j == count) {
+                       for (j = 0, k = i; j < count; j++, k++)
+                               ddp->gl_map[k] = gl;
+                       spin_unlock(&ddp->map_lock);
+                       return i;
+               }
+               i += j + 1;
+       }
+       spin_unlock(&ddp->map_lock);
+       log_debug(1 << CXGBI_DBG_DDP,
+               "NO suitable entries %u available.\n", count);
+       return -EBUSY;
+}
+
+static inline void ddp_unmark_entries(struct cxgbi_ddp_info *ddp,
+                                               int start, int count)
+{
+       spin_lock(&ddp->map_lock);
+       memset(&ddp->gl_map[start], 0,
+               count * sizeof(struct cxgbi_gather_list *));
+       spin_unlock(&ddp->map_lock);
+}
+
+static inline void ddp_gl_unmap(struct pci_dev *pdev,
+                                       struct cxgbi_gather_list *gl)
+{
+       int i;
+
+       for (i = 0; i < gl->nelem; i++)
+               dma_unmap_page(&pdev->dev, gl->phys_addr[i], PAGE_SIZE,
+                               PCI_DMA_FROMDEVICE);
+}
+
+static inline int ddp_gl_map(struct pci_dev *pdev,
+                                   struct cxgbi_gather_list *gl)
+{
+       int i;
+
+       for (i = 0; i < gl->nelem; i++) {
+               gl->phys_addr[i] = dma_map_page(&pdev->dev, gl->pages[i], 0,
+                                               PAGE_SIZE,
+                                               PCI_DMA_FROMDEVICE);
+               if (unlikely(dma_mapping_error(&pdev->dev, gl->phys_addr[i]))) {
+                       log_debug(1 << CXGBI_DBG_DDP,
+                               "page %d 0x%p, 0x%p dma mapping err.\n",
+                               i, gl->pages[i], pdev);
+                       goto unmap;
+               }
+       }
+       return i;
+unmap:
+       if (i) {
+               unsigned int nelem = gl->nelem;
+
+               gl->nelem = i;
+               ddp_gl_unmap(pdev, gl);
+               gl->nelem = nelem;
+       }
+       return -EINVAL;
+}
+
+static void ddp_release_gl(struct cxgbi_gather_list *gl,
+                                 struct pci_dev *pdev)
+{
+       ddp_gl_unmap(pdev, gl);
+       kfree(gl);
+}
+
+static struct cxgbi_gather_list *ddp_make_gl(unsigned int xferlen,
+                                                   struct scatterlist *sgl,
+                                                   unsigned int sgcnt,
+                                                   struct pci_dev *pdev,
+                                                   gfp_t gfp)
+{
+       struct cxgbi_gather_list *gl;
+       struct scatterlist *sg = sgl;
+       struct page *sgpage = sg_page(sg);
+       unsigned int sglen = sg->length;
+       unsigned int sgoffset = sg->offset;
+       unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >>
+                               PAGE_SHIFT;
+       int i = 1, j = 0;
+
+       if (xferlen < DDP_THRESHOLD) {
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "xfer %u < threshold %u, no ddp.\n",
+                       xferlen, DDP_THRESHOLD);
+               return NULL;
+       }
+
+       gl = kzalloc(sizeof(struct cxgbi_gather_list) +
+                    npages * (sizeof(dma_addr_t) +
+                    sizeof(struct page *)), gfp);
+       if (!gl) {
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "xfer %u, %u pages, OOM.\n", xferlen, npages);
+               return NULL;
+       }
+
+        log_debug(1 << CXGBI_DBG_DDP,
+               "xfer %u, sgl %u, gl max %u.\n", xferlen, sgcnt, npages);
+
+       gl->pages = (struct page **)&gl->phys_addr[npages];
+       gl->nelem = npages;
+       gl->length = xferlen;
+       gl->offset = sgoffset;
+       gl->pages[0] = sgpage;
+
+       for (i = 1, sg = sg_next(sgl), j = 0; i < sgcnt;
+               i++, sg = sg_next(sg)) {
+               struct page *page = sg_page(sg);
+
+               if (sgpage == page && sg->offset == sgoffset + sglen)
+                       sglen += sg->length;
+               else {
+                       /*  make sure the sgl is fit for ddp:
+                        *  each has the same page size, and
+                        *  all of the middle pages are used completely
+                        */
+                       if ((j && sgoffset) || ((i != sgcnt - 1) &&
+                           ((sglen + sgoffset) & ~PAGE_MASK))) {
+                               log_debug(1 << CXGBI_DBG_DDP,
+                                       "page %d/%u, %u + %u.\n",
+                                       i, sgcnt, sgoffset, sglen);
+                               goto error_out;
+                       }
+
+                       j++;
+                       if (j == gl->nelem || sg->offset) {
+                               log_debug(1 << CXGBI_DBG_DDP,
+                                       "page %d/%u, offset %u.\n",
+                                       j, gl->nelem, sg->offset);
+                               goto error_out;
+                       }
+                       gl->pages[j] = page;
+                       sglen = sg->length;
+                       sgoffset = sg->offset;
+                       sgpage = page;
+               }
+       }
+       gl->nelem = ++j;
+
+       if (ddp_gl_map(pdev, gl) < 0)
+               goto error_out;
+
+       return gl;
+
+error_out:
+       kfree(gl);
+       return NULL;
+}
+
+static void ddp_tag_release(struct cxgbi_hba *chba, u32 tag)
+{
+       struct cxgbi_device *cdev = chba->cdev;
+       struct cxgbi_ddp_info *ddp = cdev->ddp;
+       u32 idx;
+
+       idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
+       if (idx < ddp->nppods) {
+               struct cxgbi_gather_list *gl = ddp->gl_map[idx];
+               unsigned int npods;
+
+               if (!gl || !gl->nelem) {
+                       pr_warn("tag 0x%x, idx %u, gl 0x%p, %u.\n",
+                               tag, idx, gl, gl ? gl->nelem : 0);
+                       return;
+               }
+               npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "tag 0x%x, release idx %u, npods %u.\n",
+                       tag, idx, npods);
+               cdev->csk_ddp_clear(chba, tag, idx, npods);
+               ddp_unmark_entries(ddp, idx, npods);
+               ddp_release_gl(gl, ddp->pdev);
+       } else
+               pr_warn("tag 0x%x, idx %u > max %u.\n", tag, idx, ddp->nppods);
+}
+
+static int ddp_tag_reserve(struct cxgbi_sock *csk, unsigned int tid,
+                          u32 sw_tag, u32 *tagp, struct cxgbi_gather_list *gl,
+                          gfp_t gfp)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       struct cxgbi_ddp_info *ddp = cdev->ddp;
+       struct cxgbi_tag_format *tformat = &cdev->tag_format;
+       struct cxgbi_pagepod_hdr hdr;
+       unsigned int npods;
+       int idx = -1;
+       int err = -ENOMEM;
+       u32 tag;
+
+       npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+       if (ddp->idx_last == ddp->nppods)
+               idx = ddp_find_unused_entries(ddp, 0, ddp->nppods,
+                                                       npods, gl);
+       else {
+               idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1,
+                                                       ddp->nppods, npods,
+                                                       gl);
+               if (idx < 0 && ddp->idx_last >= npods) {
+                       idx = ddp_find_unused_entries(ddp, 0,
+                               min(ddp->idx_last + npods, ddp->nppods),
+                                                       npods, gl);
+               }
+       }
+       if (idx < 0) {
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "xferlen %u, gl %u, npods %u NO DDP.\n",
+                       gl->length, gl->nelem, npods);
+               return idx;
+       }
+
+       tag = cxgbi_ddp_tag_base(tformat, sw_tag);
+       tag |= idx << PPOD_IDX_SHIFT;
+
+       hdr.rsvd = 0;
+       hdr.vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
+       hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
+       hdr.max_offset = htonl(gl->length);
+       hdr.page_offset = htonl(gl->offset);
+
+       err = cdev->csk_ddp_set(csk, &hdr, idx, npods, gl);
+       if (err < 0)
+               goto unmark_entries;
+
+       ddp->idx_last = idx;
+       log_debug(1 << CXGBI_DBG_DDP,
+               "xfer %u, gl %u,%u, tid 0x%x, tag 0x%x->0x%x(%u,%u).\n",
+               gl->length, gl->nelem, gl->offset, tid, sw_tag, tag, idx,
+               npods);
+       *tagp = tag;
+       return 0;
+
+unmark_entries:
+       ddp_unmark_entries(ddp, idx, npods);
+       return err;
+}
+
+int cxgbi_ddp_reserve(struct cxgbi_sock *csk, unsigned int *tagp,
+                       unsigned int sw_tag, unsigned int xferlen,
+                       struct scatterlist *sgl, unsigned int sgcnt, gfp_t gfp)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       struct cxgbi_tag_format *tformat = &cdev->tag_format;
+       struct cxgbi_gather_list *gl;
+       int err;
+
+       if (page_idx >= DDP_PGIDX_MAX || !cdev->ddp ||
+           xferlen < DDP_THRESHOLD) {
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "pgidx %u, xfer %u, NO ddp.\n", page_idx, xferlen);
+               return -EINVAL;
+       }
+
+       if (!cxgbi_sw_tag_usable(tformat, sw_tag)) {
+               log_debug(1 << CXGBI_DBG_DDP,
+                       "sw_tag 0x%x NOT usable.\n", sw_tag);
+               return -EINVAL;
+       }
+
+       gl = ddp_make_gl(xferlen, sgl, sgcnt, cdev->pdev, gfp);
+       if (!gl)
+               return -ENOMEM;
+
+       err = ddp_tag_reserve(csk, csk->tid, sw_tag, tagp, gl, gfp);
+       if (err < 0)
+               ddp_release_gl(gl, cdev->pdev);
+
+       return err;
+}
+
+static void ddp_destroy(struct kref *kref)
+{
+       struct cxgbi_ddp_info *ddp = container_of(kref,
+                                               struct cxgbi_ddp_info,
+                                               refcnt);
+       struct cxgbi_device *cdev = ddp->cdev;
+       int i = 0;
+
+       pr_info("kref 0, destroy ddp 0x%p, cdev 0x%p.\n", ddp, cdev);
+
+       while (i < ddp->nppods) {
+               struct cxgbi_gather_list *gl = ddp->gl_map[i];
+
+               if (gl) {
+                       int npods = (gl->nelem + PPOD_PAGES_MAX - 1)
+                                       >> PPOD_PAGES_SHIFT;
+                       pr_info("cdev 0x%p, ddp %d + %d.\n", cdev, i, npods);
+                       kfree(gl);
+                       i += npods;
+               } else
+                       i++;
+       }
+       cxgbi_free_big_mem(ddp);
+}
+
+int cxgbi_ddp_cleanup(struct cxgbi_device *cdev)
+{
+       struct cxgbi_ddp_info *ddp = cdev->ddp;
+
+       log_debug(1 << CXGBI_DBG_DDP,
+               "cdev 0x%p, release ddp 0x%p.\n", cdev, ddp);
+       cdev->ddp = NULL;
+       if (ddp)
+               return kref_put(&ddp->refcnt, ddp_destroy);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_ddp_cleanup);
+
+int cxgbi_ddp_init(struct cxgbi_device *cdev,
+                  unsigned int llimit, unsigned int ulimit,
+                  unsigned int max_txsz, unsigned int max_rxsz)
+{
+       struct cxgbi_ddp_info *ddp;
+       unsigned int ppmax, bits;
+
+       ppmax = (ulimit - llimit + 1) >> PPOD_SIZE_SHIFT;
+       bits = __ilog2_u32(ppmax) + 1;
+       if (bits > PPOD_IDX_MAX_SIZE)
+               bits = PPOD_IDX_MAX_SIZE;
+       ppmax = (1 << (bits - 1)) - 1;
+
+       ddp = cxgbi_alloc_big_mem(sizeof(struct cxgbi_ddp_info) +
+                               ppmax * (sizeof(struct cxgbi_gather_list *) +
+                                        sizeof(struct sk_buff *)),
+                               GFP_KERNEL);
+       if (!ddp) {
+               pr_warn("cdev 0x%p, ddp ppmax %u OOM.\n", cdev, ppmax);
+               return -ENOMEM;
+       }
+       ddp->gl_map = (struct cxgbi_gather_list **)(ddp + 1);
+       cdev->ddp = ddp;
+
+       spin_lock_init(&ddp->map_lock);
+       kref_init(&ddp->refcnt);
+
+       ddp->cdev = cdev;
+       ddp->pdev = cdev->pdev;
+       ddp->llimit = llimit;
+       ddp->ulimit = ulimit;
+       ddp->max_txsz = min_t(unsigned int, max_txsz, ULP2_MAX_PKT_SIZE);
+       ddp->max_rxsz = min_t(unsigned int, max_rxsz, ULP2_MAX_PKT_SIZE);
+       ddp->nppods = ppmax;
+       ddp->idx_last = ppmax;
+       ddp->idx_bits = bits;
+       ddp->idx_mask = (1 << bits) - 1;
+       ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
+
+       cdev->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
+       cdev->tag_format.rsvd_bits = ddp->idx_bits;
+       cdev->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
+       cdev->tag_format.rsvd_mask = (1 << cdev->tag_format.rsvd_bits) - 1;
+
+       pr_info("%s tag format, sw %u, rsvd %u,%u, mask 0x%x.\n",
+               cdev->ports[0]->name, cdev->tag_format.sw_bits,
+               cdev->tag_format.rsvd_bits, cdev->tag_format.rsvd_shift,
+               cdev->tag_format.rsvd_mask);
+
+       cdev->tx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
+                               ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN);
+       cdev->rx_max_size = min_t(unsigned int, ULP2_MAX_PDU_PAYLOAD,
+                               ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN);
+
+       log_debug(1 << CXGBI_DBG_DDP,
+               "%s max payload size: %u/%u, %u/%u.\n",
+               cdev->ports[0]->name, cdev->tx_max_size, ddp->max_txsz,
+               cdev->rx_max_size, ddp->max_rxsz);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_ddp_init);
+
+/*
+ * APIs interacting with open-iscsi libraries
+ */
+
+static unsigned char padding[4];
+
+static void task_release_itt(struct iscsi_task *task, itt_t hdr_itt)
+{
+       struct scsi_cmnd *sc = task->sc;
+       struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_hba *chba = cconn->chba;
+       struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
+       u32 tag = ntohl((__force u32)hdr_itt);
+
+       log_debug(1 << CXGBI_DBG_DDP,
+                  "cdev 0x%p, release tag 0x%x.\n", chba->cdev, tag);
+       if (sc &&
+           (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) &&
+           cxgbi_is_ddp_tag(tformat, tag))
+               ddp_tag_release(chba, tag);
+}
+
+static int task_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
+{
+       struct scsi_cmnd *sc = task->sc;
+       struct iscsi_conn *conn = task->conn;
+       struct iscsi_session *sess = conn->session;
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_hba *chba = cconn->chba;
+       struct cxgbi_tag_format *tformat = &chba->cdev->tag_format;
+       u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
+       u32 tag = 0;
+       int err = -EINVAL;
+
+       if (sc &&
+           (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE)) {
+               err = cxgbi_ddp_reserve(cconn->cep->csk, &tag, sw_tag,
+                                       scsi_in(sc)->length,
+                                       scsi_in(sc)->table.sgl,
+                                       scsi_in(sc)->table.nents,
+                                       GFP_ATOMIC);
+               if (err < 0)
+                       log_debug(1 << CXGBI_DBG_DDP,
+                               "csk 0x%p, R task 0x%p, %u,%u, no ddp.\n",
+                               cconn->cep->csk, task, scsi_in(sc)->length,
+                               scsi_in(sc)->table.nents);
+       }
+
+       if (err < 0)
+               tag = cxgbi_set_non_ddp_tag(tformat, sw_tag);
+       /*  the itt need to sent in big-endian order */
+       *hdr_itt = (__force itt_t)htonl(tag);
+
+       log_debug(1 << CXGBI_DBG_DDP,
+               "cdev 0x%p, task 0x%p, 0x%x(0x%x,0x%x)->0x%x/0x%x.\n",
+               chba->cdev, task, sw_tag, task->itt, sess->age, tag, *hdr_itt);
+       return 0;
+}
+
+void cxgbi_parse_pdu_itt(struct iscsi_conn *conn, itt_t itt, int *idx, int *age)
+{
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_device *cdev = cconn->chba->cdev;
+       u32 tag = ntohl((__force u32) itt);
+       u32 sw_bits;
+
+       sw_bits = cxgbi_tag_nonrsvd_bits(&cdev->tag_format, tag);
+       if (idx)
+               *idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
+       if (age)
+               *age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
+
+       log_debug(1 << CXGBI_DBG_DDP,
+               "cdev 0x%p, tag 0x%x/0x%x, -> 0x%x(0x%x,0x%x).\n",
+               cdev, tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
+               age ? *age : 0xFF);
+}
+EXPORT_SYMBOL_GPL(cxgbi_parse_pdu_itt);
+
+void cxgbi_conn_tx_open(struct cxgbi_sock *csk)
+{
+       struct iscsi_conn *conn = csk->user_data;
+
+       if (conn) {
+               log_debug(1 << CXGBI_DBG_SOCK,
+                       "csk 0x%p, cid %d.\n", csk, conn->id);
+               iscsi_conn_queue_work(conn);
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_conn_tx_open);
+
+/*
+ * pdu receive, interact with libiscsi_tcp
+ */
+static inline int read_pdu_skb(struct iscsi_conn *conn,
+                              struct sk_buff *skb,
+                              unsigned int offset,
+                              int offloaded)
+{
+       int status = 0;
+       int bytes_read;
+
+       bytes_read = iscsi_tcp_recv_skb(conn, skb, offset, offloaded, &status);
+       switch (status) {
+       case ISCSI_TCP_CONN_ERR:
+               pr_info("skb 0x%p, off %u, %d, TCP_ERR.\n",
+                         skb, offset, offloaded);
+               return -EIO;
+       case ISCSI_TCP_SUSPENDED:
+               log_debug(1 << CXGBI_DBG_PDU_RX,
+                       "skb 0x%p, off %u, %d, TCP_SUSPEND, rc %d.\n",
+                       skb, offset, offloaded, bytes_read);
+               /* no transfer - just have caller flush queue */
+               return bytes_read;
+       case ISCSI_TCP_SKB_DONE:
+               pr_info("skb 0x%p, off %u, %d, TCP_SKB_DONE.\n",
+                       skb, offset, offloaded);
+               /*
+                * pdus should always fit in the skb and we should get
+                * segment done notifcation.
+                */
+               iscsi_conn_printk(KERN_ERR, conn, "Invalid pdu or skb.");
+               return -EFAULT;
+       case ISCSI_TCP_SEGMENT_DONE:
+               log_debug(1 << CXGBI_DBG_PDU_RX,
+                       "skb 0x%p, off %u, %d, TCP_SEG_DONE, rc %d.\n",
+                       skb, offset, offloaded, bytes_read);
+               return bytes_read;
+       default:
+               pr_info("skb 0x%p, off %u, %d, invalid status %d.\n",
+                       skb, offset, offloaded, status);
+               return -EINVAL;
+       }
+}
+
+static int skb_read_pdu_bhs(struct iscsi_conn *conn, struct sk_buff *skb)
+{
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+
+       log_debug(1 << CXGBI_DBG_PDU_RX,
+               "conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
+               conn, skb, skb->len, cxgbi_skcb_flags(skb));
+
+       if (!iscsi_tcp_recv_segment_is_hdr(tcp_conn)) {
+               pr_info("conn 0x%p, skb 0x%p, not hdr.\n", conn, skb);
+               iscsi_conn_failure(conn, ISCSI_ERR_PROTO);
+               return -EIO;
+       }
+
+       if (conn->hdrdgst_en &&
+           cxgbi_skcb_test_flag(skb, SKCBF_RX_HCRC_ERR)) {
+               pr_info("conn 0x%p, skb 0x%p, hcrc.\n", conn, skb);
+               iscsi_conn_failure(conn, ISCSI_ERR_HDR_DGST);
+               return -EIO;
+       }
+
+       return read_pdu_skb(conn, skb, 0, 0);
+}
+
+static int skb_read_pdu_data(struct iscsi_conn *conn, struct sk_buff *lskb,
+                            struct sk_buff *skb, unsigned int offset)
+{
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       bool offloaded = 0;
+       int opcode = tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK;
+
+       log_debug(1 << CXGBI_DBG_PDU_RX,
+               "conn 0x%p, skb 0x%p, len %u, flag 0x%lx.\n",
+               conn, skb, skb->len, cxgbi_skcb_flags(skb));
+
+       if (conn->datadgst_en &&
+           cxgbi_skcb_test_flag(lskb, SKCBF_RX_DCRC_ERR)) {
+               pr_info("conn 0x%p, skb 0x%p, dcrc 0x%lx.\n",
+                       conn, lskb, cxgbi_skcb_flags(lskb));
+               iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST);
+               return -EIO;
+       }
+
+       if (iscsi_tcp_recv_segment_is_hdr(tcp_conn))
+               return 0;
+
+       /* coalesced, add header digest length */
+       if (lskb == skb && conn->hdrdgst_en)
+               offset += ISCSI_DIGEST_SIZE;
+
+       if (cxgbi_skcb_test_flag(lskb, SKCBF_RX_DATA_DDPD))
+               offloaded = 1;
+
+       if (opcode == ISCSI_OP_SCSI_DATA_IN)
+               log_debug(1 << CXGBI_DBG_PDU_RX,
+                       "skb 0x%p, op 0x%x, itt 0x%x, %u %s ddp'ed.\n",
+                       skb, opcode, ntohl(tcp_conn->in.hdr->itt),
+                       tcp_conn->in.datalen, offloaded ? "is" : "not");
+
+       return read_pdu_skb(conn, skb, offset, offloaded);
+}
+
+static void csk_return_rx_credits(struct cxgbi_sock *csk, int copied)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       int must_send;
+       u32 credits;
+
+       log_debug(1 << CXGBI_DBG_PDU_RX,
+               "csk 0x%p,%u,0x%lx,%u, seq %u, wup %u, thre %u, %u.\n",
+               csk, csk->state, csk->flags, csk->tid, csk->copied_seq,
+               csk->rcv_wup, cdev->rx_credit_thres,
+               cdev->rcv_win);
+
+       if (csk->state != CTP_ESTABLISHED)
+               return;
+
+       credits = csk->copied_seq - csk->rcv_wup;
+       if (unlikely(!credits))
+               return;
+       if (unlikely(cdev->rx_credit_thres == 0))
+               return;
+
+       must_send = credits + 16384 >= cdev->rcv_win;
+       if (must_send || credits >= cdev->rx_credit_thres)
+               csk->rcv_wup += cdev->csk_send_rx_credits(csk, credits);
+}
+
+void cxgbi_conn_pdu_ready(struct cxgbi_sock *csk)
+{
+       struct cxgbi_device *cdev = csk->cdev;
+       struct iscsi_conn *conn = csk->user_data;
+       struct sk_buff *skb;
+       unsigned int read = 0;
+       int err = 0;
+
+       log_debug(1 << CXGBI_DBG_PDU_RX,
+               "csk 0x%p, conn 0x%p.\n", csk, conn);
+
+       if (unlikely(!conn || conn->suspend_rx)) {
+               log_debug(1 << CXGBI_DBG_PDU_RX,
+                       "csk 0x%p, conn 0x%p, id %d, suspend_rx %lu!\n",
+                       csk, conn, conn ? conn->id : 0xFF,
+                       conn ? conn->suspend_rx : 0xFF);
+               return;
+       }
+
+       while (!err) {
+               skb = skb_peek(&csk->receive_queue);
+               if (!skb ||
+                   !(cxgbi_skcb_test_flag(skb, SKCBF_RX_STATUS))) {
+                       if (skb)
+                               log_debug(1 << CXGBI_DBG_PDU_RX,
+                                       "skb 0x%p, NOT ready 0x%lx.\n",
+                                       skb, cxgbi_skcb_flags(skb));
+                       break;
+               }
+               __skb_unlink(skb, &csk->receive_queue);
+
+               read += cxgbi_skcb_rx_pdulen(skb);
+               log_debug(1 << CXGBI_DBG_PDU_RX,
+                       "csk 0x%p, skb 0x%p,%u,f 0x%lx, pdu len %u.\n",
+                       csk, skb, skb->len, cxgbi_skcb_flags(skb),
+                       cxgbi_skcb_rx_pdulen(skb));
+
+               if (cxgbi_skcb_test_flag(skb, SKCBF_RX_COALESCED)) {
+                       err = skb_read_pdu_bhs(conn, skb);
+                       if (err < 0) {
+                               pr_err("coalesced bhs, csk 0x%p, skb 0x%p,%u, "
+                                       "f 0x%lx, plen %u.\n",
+                                       csk, skb, skb->len,
+                                       cxgbi_skcb_flags(skb),
+                                       cxgbi_skcb_rx_pdulen(skb));
+                               goto skb_done;
+                       }
+                       err = skb_read_pdu_data(conn, skb, skb,
+                                               err + cdev->skb_rx_extra);
+                       if (err < 0)
+                               pr_err("coalesced data, csk 0x%p, skb 0x%p,%u, "
+                                       "f 0x%lx, plen %u.\n",
+                                       csk, skb, skb->len,
+                                       cxgbi_skcb_flags(skb),
+                                       cxgbi_skcb_rx_pdulen(skb));
+               } else {
+                       err = skb_read_pdu_bhs(conn, skb);
+                       if (err < 0) {
+                               pr_err("bhs, csk 0x%p, skb 0x%p,%u, "
+                                       "f 0x%lx, plen %u.\n",
+                                       csk, skb, skb->len,
+                                       cxgbi_skcb_flags(skb),
+                                       cxgbi_skcb_rx_pdulen(skb));
+                               goto skb_done;
+                       }
+
+                       if (cxgbi_skcb_test_flag(skb, SKCBF_RX_DATA)) {
+                               struct sk_buff *dskb;
+
+                               dskb = skb_peek(&csk->receive_queue);
+                               if (!dskb) {
+                                       pr_err("csk 0x%p, skb 0x%p,%u, f 0x%lx,"
+                                               " plen %u, NO data.\n",
+                                               csk, skb, skb->len,
+                                               cxgbi_skcb_flags(skb),
+                                               cxgbi_skcb_rx_pdulen(skb));
+                                       err = -EIO;
+                                       goto skb_done;
+                               }
+                               __skb_unlink(dskb, &csk->receive_queue);
+
+                               err = skb_read_pdu_data(conn, skb, dskb, 0);
+                               if (err < 0)
+                                       pr_err("data, csk 0x%p, skb 0x%p,%u, "
+                                               "f 0x%lx, plen %u, dskb 0x%p,"
+                                               "%u.\n",
+                                               csk, skb, skb->len,
+                                               cxgbi_skcb_flags(skb),
+                                               cxgbi_skcb_rx_pdulen(skb),
+                                               dskb, dskb->len);
+                               __kfree_skb(dskb);
+                       } else
+                               err = skb_read_pdu_data(conn, skb, skb, 0);
+               }
+skb_done:
+               __kfree_skb(skb);
+
+               if (err < 0)
+                       break;
+       }
+
+       log_debug(1 << CXGBI_DBG_PDU_RX, "csk 0x%p, read %u.\n", csk, read);
+       if (read) {
+               csk->copied_seq += read;
+               csk_return_rx_credits(csk, read);
+               conn->rxdata_octets += read;
+       }
+
+       if (err < 0) {
+               pr_info("csk 0x%p, 0x%p, rx failed %d, read %u.\n",
+                       csk, conn, err, read);
+               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_conn_pdu_ready);
+
+static int sgl_seek_offset(struct scatterlist *sgl, unsigned int sgcnt,
+                               unsigned int offset, unsigned int *off,
+                               struct scatterlist **sgp)
+{
+       int i;
+       struct scatterlist *sg;
+
+       for_each_sg(sgl, sg, sgcnt, i) {
+               if (offset < sg->length) {
+                       *off = offset;
+                       *sgp = sg;
+                       return 0;
+               }
+               offset -= sg->length;
+       }
+       return -EFAULT;
+}
+
+static int sgl_read_to_frags(struct scatterlist *sg, unsigned int sgoffset,
+                               unsigned int dlen, struct page_frag *frags,
+                               int frag_max)
+{
+       unsigned int datalen = dlen;
+       unsigned int sglen = sg->length - sgoffset;
+       struct page *page = sg_page(sg);
+       int i;
+
+       i = 0;
+       do {
+               unsigned int copy;
+
+               if (!sglen) {
+                       sg = sg_next(sg);
+                       if (!sg) {
+                               pr_warn("sg %d NULL, len %u/%u.\n",
+                                       i, datalen, dlen);
+                               return -EINVAL;
+                       }
+                       sgoffset = 0;
+                       sglen = sg->length;
+                       page = sg_page(sg);
+
+               }
+               copy = min(datalen, sglen);
+               if (i && page == frags[i - 1].page &&
+                   sgoffset + sg->offset ==
+                       frags[i - 1].offset + frags[i - 1].size) {
+                       frags[i - 1].size += copy;
+               } else {
+                       if (i >= frag_max) {
+                               pr_warn("too many pages %u, dlen %u.\n",
+                                       frag_max, dlen);
+                               return -EINVAL;
+                       }
+
+                       frags[i].page = page;
+                       frags[i].offset = sg->offset + sgoffset;
+                       frags[i].size = copy;
+                       i++;
+               }
+               datalen -= copy;
+               sgoffset += copy;
+               sglen -= copy;
+       } while (datalen);
+
+       return i;
+}
+
+int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
+{
+       struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_device *cdev = cconn->chba->cdev;
+       struct iscsi_conn *conn = task->conn;
+       struct iscsi_tcp_task *tcp_task = task->dd_data;
+       struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+       struct scsi_cmnd *sc = task->sc;
+       int headroom = SKB_TX_ISCSI_PDU_HEADER_MAX;
+
+       tcp_task->dd_data = tdata;
+       task->hdr = NULL;
+
+       if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) &&
+           (opcode == ISCSI_OP_SCSI_DATA_OUT ||
+            (opcode == ISCSI_OP_SCSI_CMD &&
+             (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_TO_DEVICE))))
+               /* data could goes into skb head */
+               headroom += min_t(unsigned int,
+                               SKB_MAX_HEAD(cdev->skb_tx_rsvd),
+                               conn->max_xmit_dlength);
+
+       tdata->skb = alloc_skb(cdev->skb_tx_rsvd + headroom, GFP_ATOMIC);
+       if (!tdata->skb) {
+               struct cxgbi_sock *csk = cconn->cep->csk;
+               struct net_device *ndev = cdev->ports[csk->port_id];
+               ndev->stats.tx_dropped++;
+               return -ENOMEM;
+       }
+
+       skb_reserve(tdata->skb, cdev->skb_tx_rsvd);
+       task->hdr = (struct iscsi_hdr *)tdata->skb->data;
+       task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */
+
+       /* data_out uses scsi_cmd's itt */
+       if (opcode != ISCSI_OP_SCSI_DATA_OUT)
+               task_reserve_itt(task, &task->hdr->itt);
+
+       log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
+               "task 0x%p, op 0x%x, skb 0x%p,%u+%u/%u, itt 0x%x.\n",
+               task, opcode, tdata->skb, cdev->skb_tx_rsvd, headroom,
+               conn->max_xmit_dlength, ntohl(task->hdr->itt));
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_conn_alloc_pdu);
+
+static inline void tx_skb_setmode(struct sk_buff *skb, int hcrc, int dcrc)
+{
+       if (hcrc || dcrc) {
+               u8 submode = 0;
+
+               if (hcrc)
+                       submode |= 1;
+               if (dcrc)
+                       submode |= 2;
+               cxgbi_skcb_ulp_mode(skb) = (ULP2_MODE_ISCSI << 4) | submode;
+       } else
+               cxgbi_skcb_ulp_mode(skb) = 0;
+}
+
+int cxgbi_conn_init_pdu(struct iscsi_task *task, unsigned int offset,
+                             unsigned int count)
+{
+       struct iscsi_conn *conn = task->conn;
+       struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+       struct sk_buff *skb = tdata->skb;
+       unsigned int datalen = count;
+       int i, padlen = iscsi_padding(count);
+       struct page *pg;
+
+       log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
+               "task 0x%p,0x%p, skb 0x%p, 0x%x,0x%x,0x%x, %u+%u.\n",
+               task, task->sc, skb, (*skb->data) & ISCSI_OPCODE_MASK,
+               ntohl(task->cmdsn), ntohl(task->hdr->itt), offset, count);
+
+       skb_put(skb, task->hdr_len);
+       tx_skb_setmode(skb, conn->hdrdgst_en, datalen ? conn->datadgst_en : 0);
+       if (!count)
+               return 0;
+
+       if (task->sc) {
+               struct scsi_data_buffer *sdb = scsi_out(task->sc);
+               struct scatterlist *sg = NULL;
+               int err;
+
+               tdata->offset = offset;
+               tdata->count = count;
+               err = sgl_seek_offset(
+                                       sdb->table.sgl, sdb->table.nents,
+                                       tdata->offset, &tdata->sgoffset, &sg);
+               if (err < 0) {
+                       pr_warn("tpdu, sgl %u, bad offset %u/%u.\n",
+                               sdb->table.nents, tdata->offset, sdb->length);
+                       return err;
+               }
+               err = sgl_read_to_frags(sg, tdata->sgoffset, tdata->count,
+                                       tdata->frags, MAX_PDU_FRAGS);
+               if (err < 0) {
+                       pr_warn("tpdu, sgl %u, bad offset %u + %u.\n",
+                               sdb->table.nents, tdata->offset, tdata->count);
+                       return err;
+               }
+               tdata->nr_frags = err;
+
+               if (tdata->nr_frags > MAX_SKB_FRAGS ||
+                   (padlen && tdata->nr_frags == MAX_SKB_FRAGS)) {
+                       char *dst = skb->data + task->hdr_len;
+                       struct page_frag *frag = tdata->frags;
+
+                       /* data fits in the skb's headroom */
+                       for (i = 0; i < tdata->nr_frags; i++, frag++) {
+                               char *src = kmap_atomic(frag->page);
+
+                               memcpy(dst, src+frag->offset, frag->size);
+                               dst += frag->size;
+                               kunmap_atomic(src);
+                       }
+                       if (padlen) {
+                               memset(dst, 0, padlen);
+                               padlen = 0;
+                       }
+                       skb_put(skb, count + padlen);
+               } else {
+                       /* data fit into frag_list */
+                       for (i = 0; i < tdata->nr_frags; i++) {
+                               __skb_fill_page_desc(skb, i,
+                                               tdata->frags[i].page,
+                                               tdata->frags[i].offset,
+                                               tdata->frags[i].size);
+                               skb_frag_ref(skb, i);
+                       }
+                       skb_shinfo(skb)->nr_frags = tdata->nr_frags;
+                       skb->len += count;
+                       skb->data_len += count;
+                       skb->truesize += count;
+               }
+
+       } else {
+               pg = virt_to_page(task->data);
+
+               get_page(pg);
+               skb_fill_page_desc(skb, 0, pg, offset_in_page(task->data),
+                                       count);
+               skb->len += count;
+               skb->data_len += count;
+               skb->truesize += count;
+       }
+
+       if (padlen) {
+               i = skb_shinfo(skb)->nr_frags;
+               skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+                               virt_to_page(padding), offset_in_page(padding),
+                               padlen);
+
+               skb->data_len += padlen;
+               skb->truesize += padlen;
+               skb->len += padlen;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_conn_init_pdu);
+
+int cxgbi_conn_xmit_pdu(struct iscsi_task *task)
+{
+       struct iscsi_tcp_conn *tcp_conn = task->conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+       struct sk_buff *skb = tdata->skb;
+       unsigned int datalen;
+       int err;
+
+       if (!skb) {
+               log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
+                       "task 0x%p, skb NULL.\n", task);
+               return 0;
+       }
+
+       datalen = skb->data_len;
+       tdata->skb = NULL;
+       err = cxgbi_sock_send_pdus(cconn->cep->csk, skb);
+       if (err > 0) {
+               int pdulen = err;
+
+               log_debug(1 << CXGBI_DBG_PDU_TX,
+                       "task 0x%p,0x%p, skb 0x%p, len %u/%u, rv %d.\n",
+                       task, task->sc, skb, skb->len, skb->data_len, err);
+
+               if (task->conn->hdrdgst_en)
+                       pdulen += ISCSI_DIGEST_SIZE;
+
+               if (datalen && task->conn->datadgst_en)
+                       pdulen += ISCSI_DIGEST_SIZE;
+
+               task->conn->txdata_octets += pdulen;
+               return 0;
+       }
+
+       if (err == -EAGAIN || err == -ENOBUFS) {
+               log_debug(1 << CXGBI_DBG_PDU_TX,
+                       "task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n",
+                       task, skb, skb->len, skb->data_len, err);
+               /* reset skb to send when we are called again */
+               tdata->skb = skb;
+               return err;
+       }
+
+       log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
+               "itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n",
+               task->itt, skb, skb->len, skb->data_len, err);
+
+       kfree_skb(skb);
+
+       iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err);
+       iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED);
+       return err;
+}
+EXPORT_SYMBOL_GPL(cxgbi_conn_xmit_pdu);
+
+void cxgbi_cleanup_task(struct iscsi_task *task)
+{
+       struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "task 0x%p, skb 0x%p, itt 0x%x.\n",
+               task, tdata->skb, task->hdr_itt);
+
+       /*  never reached the xmit task callout */
+       if (tdata->skb)
+               __kfree_skb(tdata->skb);
+       memset(tdata, 0, sizeof(*tdata));
+
+       task_release_itt(task, task->hdr_itt);
+       iscsi_tcp_cleanup_task(task);
+}
+EXPORT_SYMBOL_GPL(cxgbi_cleanup_task);
+
+void cxgbi_get_conn_stats(struct iscsi_cls_conn *cls_conn,
+                               struct iscsi_stats *stats)
+{
+       struct iscsi_conn *conn = cls_conn->dd_data;
+
+       stats->txdata_octets = conn->txdata_octets;
+       stats->rxdata_octets = conn->rxdata_octets;
+       stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
+       stats->dataout_pdus = conn->dataout_pdus_cnt;
+       stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
+       stats->datain_pdus = conn->datain_pdus_cnt;
+       stats->r2t_pdus = conn->r2t_pdus_cnt;
+       stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
+       stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
+       stats->digest_err = 0;
+       stats->timeout_err = 0;
+       stats->custom_length = 1;
+       strcpy(stats->custom[0].desc, "eh_abort_cnt");
+       stats->custom[0].value = conn->eh_abort_cnt;
+}
+EXPORT_SYMBOL_GPL(cxgbi_get_conn_stats);
+
+static int cxgbi_conn_max_xmit_dlength(struct iscsi_conn *conn)
+{
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_device *cdev = cconn->chba->cdev;
+       unsigned int headroom = SKB_MAX_HEAD(cdev->skb_tx_rsvd);
+       unsigned int max_def = 512 * MAX_SKB_FRAGS;
+       unsigned int max = max(max_def, headroom);
+
+       max = min(cconn->chba->cdev->tx_max_size, max);
+       if (conn->max_xmit_dlength)
+               conn->max_xmit_dlength = min(conn->max_xmit_dlength, max);
+       else
+               conn->max_xmit_dlength = max;
+       cxgbi_align_pdu_size(conn->max_xmit_dlength);
+
+       return 0;
+}
+
+static int cxgbi_conn_max_recv_dlength(struct iscsi_conn *conn)
+{
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       unsigned int max = cconn->chba->cdev->rx_max_size;
+
+       cxgbi_align_pdu_size(max);
+
+       if (conn->max_recv_dlength) {
+               if (conn->max_recv_dlength > max) {
+                       pr_err("MaxRecvDataSegmentLength %u > %u.\n",
+                               conn->max_recv_dlength, max);
+                       return -EINVAL;
+               }
+               conn->max_recv_dlength = min(conn->max_recv_dlength, max);
+               cxgbi_align_pdu_size(conn->max_recv_dlength);
+       } else
+               conn->max_recv_dlength = max;
+
+       return 0;
+}
+
+int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn,
+                       enum iscsi_param param, char *buf, int buflen)
+{
+       struct iscsi_conn *conn = cls_conn->dd_data;
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct cxgbi_sock *csk = cconn->cep->csk;
+       int err;
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "cls_conn 0x%p, param %d, buf(%d) %s.\n",
+               cls_conn, param, buflen, buf);
+
+       switch (param) {
+       case ISCSI_PARAM_HDRDGST_EN:
+               err = iscsi_set_param(cls_conn, param, buf, buflen);
+               if (!err && conn->hdrdgst_en)
+                       err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid,
+                                                       conn->hdrdgst_en,
+                                                       conn->datadgst_en, 0);
+               break;
+       case ISCSI_PARAM_DATADGST_EN:
+               err = iscsi_set_param(cls_conn, param, buf, buflen);
+               if (!err && conn->datadgst_en)
+                       err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid,
+                                                       conn->hdrdgst_en,
+                                                       conn->datadgst_en, 0);
+               break;
+       case ISCSI_PARAM_MAX_R2T:
+               return iscsi_tcp_set_max_r2t(conn, buf);
+       case ISCSI_PARAM_MAX_RECV_DLENGTH:
+               err = iscsi_set_param(cls_conn, param, buf, buflen);
+               if (!err)
+                       err = cxgbi_conn_max_recv_dlength(conn);
+               break;
+       case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+               err = iscsi_set_param(cls_conn, param, buf, buflen);
+               if (!err)
+                       err = cxgbi_conn_max_xmit_dlength(conn);
+               break;
+       default:
+               return iscsi_set_param(cls_conn, param, buf, buflen);
+       }
+       return err;
+}
+EXPORT_SYMBOL_GPL(cxgbi_set_conn_param);
+
+static inline int csk_print_port(struct cxgbi_sock *csk, char *buf)
+{
+       int len;
+
+       cxgbi_sock_get(csk);
+       len = sprintf(buf, "%hu\n", ntohs(csk->daddr.sin_port));
+       cxgbi_sock_put(csk);
+
+       return len;
+}
+
+static inline int csk_print_ip(struct cxgbi_sock *csk, char *buf)
+{
+       int len;
+
+       cxgbi_sock_get(csk);
+       if (csk->csk_family == AF_INET)
+               len = sprintf(buf, "%pI4",
+                             &csk->daddr.sin_addr.s_addr);
+       else
+               len = sprintf(buf, "%pI6",
+                             &csk->daddr6.sin6_addr);
+
+       cxgbi_sock_put(csk);
+
+       return len;
+}
+
+int cxgbi_get_ep_param(struct iscsi_endpoint *ep, enum iscsi_param param,
+                      char *buf)
+{
+       struct cxgbi_endpoint *cep = ep->dd_data;
+       struct cxgbi_sock *csk;
+       int len;
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "cls_conn 0x%p, param %d.\n", ep, param);
+
+       switch (param) {
+       case ISCSI_PARAM_CONN_PORT:
+       case ISCSI_PARAM_CONN_ADDRESS:
+               if (!cep)
+                       return -ENOTCONN;
+
+               csk = cep->csk;
+               if (!csk)
+                       return -ENOTCONN;
+
+               return iscsi_conn_get_addr_param((struct sockaddr_storage *)
+                                                &csk->daddr, param, buf);
+       default:
+               return -ENOSYS;
+       }
+       return len;
+}
+EXPORT_SYMBOL_GPL(cxgbi_get_ep_param);
+
+struct iscsi_cls_conn *
+cxgbi_create_conn(struct iscsi_cls_session *cls_session, u32 cid)
+{
+       struct iscsi_cls_conn *cls_conn;
+       struct iscsi_conn *conn;
+       struct iscsi_tcp_conn *tcp_conn;
+       struct cxgbi_conn *cconn;
+
+       cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*cconn), cid);
+       if (!cls_conn)
+               return NULL;
+
+       conn = cls_conn->dd_data;
+       tcp_conn = conn->dd_data;
+       cconn = tcp_conn->dd_data;
+       cconn->iconn = conn;
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "cid %u(0x%x), cls 0x%p,0x%p, conn 0x%p,0x%p,0x%p.\n",
+               cid, cid, cls_session, cls_conn, conn, tcp_conn, cconn);
+
+       return cls_conn;
+}
+EXPORT_SYMBOL_GPL(cxgbi_create_conn);
+
+int cxgbi_bind_conn(struct iscsi_cls_session *cls_session,
+                               struct iscsi_cls_conn *cls_conn,
+                               u64 transport_eph, int is_leading)
+{
+       struct iscsi_conn *conn = cls_conn->dd_data;
+       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct cxgbi_conn *cconn = tcp_conn->dd_data;
+       struct iscsi_endpoint *ep;
+       struct cxgbi_endpoint *cep;
+       struct cxgbi_sock *csk;
+       int err;
+
+       ep = iscsi_lookup_endpoint(transport_eph);
+       if (!ep)
+               return -EINVAL;
+
+       /*  setup ddp pagesize */
+       cep = ep->dd_data;
+       csk = cep->csk;
+       err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, page_idx, 0);
+       if (err < 0)
+               return err;
+
+       err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
+       if (err)
+               return -EINVAL;
+
+       /*  calculate the tag idx bits needed for this conn based on cmds_max */
+       cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
+
+       write_lock_bh(&csk->callback_lock);
+       csk->user_data = conn;
+       cconn->chba = cep->chba;
+       cconn->cep = cep;
+       cep->cconn = cconn;
+       write_unlock_bh(&csk->callback_lock);
+
+       cxgbi_conn_max_xmit_dlength(conn);
+       cxgbi_conn_max_recv_dlength(conn);
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "cls 0x%p,0x%p, ep 0x%p, cconn 0x%p, csk 0x%p.\n",
+               cls_session, cls_conn, ep, cconn, csk);
+       /*  init recv engine */
+       iscsi_tcp_hdr_recv_prep(tcp_conn);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_bind_conn);
+
+struct iscsi_cls_session *cxgbi_create_session(struct iscsi_endpoint *ep,
+                                               u16 cmds_max, u16 qdepth,
+                                               u32 initial_cmdsn)
+{
+       struct cxgbi_endpoint *cep;
+       struct cxgbi_hba *chba;
+       struct Scsi_Host *shost;
+       struct iscsi_cls_session *cls_session;
+       struct iscsi_session *session;
+
+       if (!ep) {
+               pr_err("missing endpoint.\n");
+               return NULL;
+       }
+
+       cep = ep->dd_data;
+       chba = cep->chba;
+       shost = chba->shost;
+
+       BUG_ON(chba != iscsi_host_priv(shost));
+
+       cls_session = iscsi_session_setup(chba->cdev->itp, shost,
+                                       cmds_max, 0,
+                                       sizeof(struct iscsi_tcp_task) +
+                                       sizeof(struct cxgbi_task_data),
+                                       initial_cmdsn, ISCSI_MAX_TARGET);
+       if (!cls_session)
+               return NULL;
+
+       session = cls_session->dd_data;
+       if (iscsi_tcp_r2tpool_alloc(session))
+               goto remove_session;
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "ep 0x%p, cls sess 0x%p.\n", ep, cls_session);
+       return cls_session;
+
+remove_session:
+       iscsi_session_teardown(cls_session);
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(cxgbi_create_session);
+
+void cxgbi_destroy_session(struct iscsi_cls_session *cls_session)
+{
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "cls sess 0x%p.\n", cls_session);
+
+       iscsi_tcp_r2tpool_free(cls_session->dd_data);
+       iscsi_session_teardown(cls_session);
+}
+EXPORT_SYMBOL_GPL(cxgbi_destroy_session);
+
+int cxgbi_set_host_param(struct Scsi_Host *shost, enum iscsi_host_param param,
+                       char *buf, int buflen)
+{
+       struct cxgbi_hba *chba = iscsi_host_priv(shost);
+
+       if (!chba->ndev) {
+               shost_printk(KERN_ERR, shost, "Could not get host param. "
+                               "netdev for host not set.\n");
+               return -ENODEV;
+       }
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "shost 0x%p, hba 0x%p,%s, param %d, buf(%d) %s.\n",
+               shost, chba, chba->ndev->name, param, buflen, buf);
+
+       switch (param) {
+       case ISCSI_HOST_PARAM_IPADDRESS:
+       {
+               __be32 addr = in_aton(buf);
+               log_debug(1 << CXGBI_DBG_ISCSI,
+                       "hba %s, req. ipv4 %pI4.\n", chba->ndev->name, &addr);
+               cxgbi_set_iscsi_ipv4(chba, addr);
+               return 0;
+       }
+       case ISCSI_HOST_PARAM_HWADDRESS:
+       case ISCSI_HOST_PARAM_NETDEV_NAME:
+               return 0;
+       default:
+               return iscsi_host_set_param(shost, param, buf, buflen);
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_set_host_param);
+
+int cxgbi_get_host_param(struct Scsi_Host *shost, enum iscsi_host_param param,
+                       char *buf)
+{
+       struct cxgbi_hba *chba = iscsi_host_priv(shost);
+       int len = 0;
+
+       if (!chba->ndev) {
+               shost_printk(KERN_ERR, shost, "Could not get host param. "
+                               "netdev for host not set.\n");
+               return -ENODEV;
+       }
+
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "shost 0x%p, hba 0x%p,%s, param %d.\n",
+               shost, chba, chba->ndev->name, param);
+
+       switch (param) {
+       case ISCSI_HOST_PARAM_HWADDRESS:
+               len = sysfs_format_mac(buf, chba->ndev->dev_addr, 6);
+               break;
+       case ISCSI_HOST_PARAM_NETDEV_NAME:
+               len = sprintf(buf, "%s\n", chba->ndev->name);
+               break;
+       case ISCSI_HOST_PARAM_IPADDRESS:
+       {
+               struct cxgbi_sock *csk = find_sock_on_port(chba->cdev,
+                                                          chba->port_id);
+               if (csk) {
+                       len = sprintf(buf, "%pIS",
+                                     (struct sockaddr *)&csk->saddr);
+               }
+               log_debug(1 << CXGBI_DBG_ISCSI,
+                         "hba %s, addr %s.\n", chba->ndev->name, buf);
+               break;
+       }
+       default:
+               return iscsi_host_get_param(shost, param, buf);
+       }
+
+       return len;
+}
+EXPORT_SYMBOL_GPL(cxgbi_get_host_param);
+
+struct iscsi_endpoint *cxgbi_ep_connect(struct Scsi_Host *shost,
+                                       struct sockaddr *dst_addr,
+                                       int non_blocking)
+{
+       struct iscsi_endpoint *ep;
+       struct cxgbi_endpoint *cep;
+       struct cxgbi_hba *hba = NULL;
+       struct cxgbi_sock *csk;
+       int err = -EINVAL;
+
+       log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
+               "shost 0x%p, non_blocking %d, dst_addr 0x%p.\n",
+               shost, non_blocking, dst_addr);
+
+       if (shost) {
+               hba = iscsi_host_priv(shost);
+               if (!hba) {
+                       pr_info("shost 0x%p, priv NULL.\n", shost);
+                       goto err_out;
+               }
+       }
+
+       if (dst_addr->sa_family == AF_INET) {
+               csk = cxgbi_check_route(dst_addr);
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (dst_addr->sa_family == AF_INET6) {
+               csk = cxgbi_check_route6(dst_addr);
+#endif
+       } else {
+               pr_info("address family 0x%x NOT supported.\n",
+                       dst_addr->sa_family);
+               err = -EAFNOSUPPORT;
+               return (struct iscsi_endpoint *)ERR_PTR(err);
+       }
+
+       if (IS_ERR(csk))
+               return (struct iscsi_endpoint *)csk;
+       cxgbi_sock_get(csk);
+
+       if (!hba)
+               hba = csk->cdev->hbas[csk->port_id];
+       else if (hba != csk->cdev->hbas[csk->port_id]) {
+               pr_info("Could not connect through requested host %u"
+                       "hba 0x%p != 0x%p (%u).\n",
+                       shost->host_no, hba,
+                       csk->cdev->hbas[csk->port_id], csk->port_id);
+               err = -ENOSPC;
+               goto release_conn;
+       }
+
+       err = sock_get_port(csk);
+       if (err)
+               goto release_conn;
+
+       cxgbi_sock_set_state(csk, CTP_CONNECTING);
+       err = csk->cdev->csk_init_act_open(csk);
+       if (err)
+               goto release_conn;
+
+       if (cxgbi_sock_is_closing(csk)) {
+               err = -ENOSPC;
+               pr_info("csk 0x%p is closing.\n", csk);
+               goto release_conn;
+       }
+
+       ep = iscsi_create_endpoint(sizeof(*cep));
+       if (!ep) {
+               err = -ENOMEM;
+               pr_info("iscsi alloc ep, OOM.\n");
+               goto release_conn;
+       }
+
+       cep = ep->dd_data;
+       cep->csk = csk;
+       cep->chba = hba;
+
+       log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
+               "ep 0x%p, cep 0x%p, csk 0x%p, hba 0x%p,%s.\n",
+               ep, cep, csk, hba, hba->ndev->name);
+       return ep;
+
+release_conn:
+       cxgbi_sock_put(csk);
+       cxgbi_sock_closed(csk);
+err_out:
+       return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(cxgbi_ep_connect);
+
+int cxgbi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
+{
+       struct cxgbi_endpoint *cep = ep->dd_data;
+       struct cxgbi_sock *csk = cep->csk;
+
+       if (!cxgbi_sock_is_established(csk))
+               return 0;
+       return 1;
+}
+EXPORT_SYMBOL_GPL(cxgbi_ep_poll);
+
+void cxgbi_ep_disconnect(struct iscsi_endpoint *ep)
+{
+       struct cxgbi_endpoint *cep = ep->dd_data;
+       struct cxgbi_conn *cconn = cep->cconn;
+       struct cxgbi_sock *csk = cep->csk;
+
+       log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_SOCK,
+               "ep 0x%p, cep 0x%p, cconn 0x%p, csk 0x%p,%u,0x%lx.\n",
+               ep, cep, cconn, csk, csk->state, csk->flags);
+
+       if (cconn && cconn->iconn) {
+               iscsi_suspend_tx(cconn->iconn);
+               write_lock_bh(&csk->callback_lock);
+               cep->csk->user_data = NULL;
+               cconn->cep = NULL;
+               write_unlock_bh(&csk->callback_lock);
+       }
+       iscsi_destroy_endpoint(ep);
+
+       if (likely(csk->state >= CTP_ESTABLISHED))
+               need_active_close(csk);
+       else
+               cxgbi_sock_closed(csk);
+
+       cxgbi_sock_put(csk);
+}
+EXPORT_SYMBOL_GPL(cxgbi_ep_disconnect);
+
+int cxgbi_iscsi_init(struct iscsi_transport *itp,
+                       struct scsi_transport_template **stt)
+{
+       *stt = iscsi_register_transport(itp);
+       if (*stt == NULL) {
+               pr_err("unable to register %s transport 0x%p.\n",
+                       itp->name, itp);
+               return -ENODEV;
+       }
+       log_debug(1 << CXGBI_DBG_ISCSI,
+               "%s, registered iscsi transport 0x%p.\n",
+               itp->name, stt);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_iscsi_init);
+
+void cxgbi_iscsi_cleanup(struct iscsi_transport *itp,
+                       struct scsi_transport_template **stt)
+{
+       if (*stt) {
+               log_debug(1 << CXGBI_DBG_ISCSI,
+                       "de-register transport 0x%p, %s, stt 0x%p.\n",
+                       itp, itp->name, *stt);
+               *stt = NULL;
+               iscsi_unregister_transport(itp);
+       }
+}
+EXPORT_SYMBOL_GPL(cxgbi_iscsi_cleanup);
+
+umode_t cxgbi_attr_is_visible(int param_type, int param)
+{
+       switch (param_type) {
+       case ISCSI_HOST_PARAM:
+               switch (param) {
+               case ISCSI_HOST_PARAM_NETDEV_NAME:
+               case ISCSI_HOST_PARAM_HWADDRESS:
+               case ISCSI_HOST_PARAM_IPADDRESS:
+               case ISCSI_HOST_PARAM_INITIATOR_NAME:
+                       return S_IRUGO;
+               default:
+                       return 0;
+               }
+       case ISCSI_PARAM:
+               switch (param) {
+               case ISCSI_PARAM_MAX_RECV_DLENGTH:
+               case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+               case ISCSI_PARAM_HDRDGST_EN:
+               case ISCSI_PARAM_DATADGST_EN:
+               case ISCSI_PARAM_CONN_ADDRESS:
+               case ISCSI_PARAM_CONN_PORT:
+               case ISCSI_PARAM_EXP_STATSN:
+               case ISCSI_PARAM_PERSISTENT_ADDRESS:
+               case ISCSI_PARAM_PERSISTENT_PORT:
+               case ISCSI_PARAM_PING_TMO:
+               case ISCSI_PARAM_RECV_TMO:
+               case ISCSI_PARAM_INITIAL_R2T_EN:
+               case ISCSI_PARAM_MAX_R2T:
+               case ISCSI_PARAM_IMM_DATA_EN:
+               case ISCSI_PARAM_FIRST_BURST:
+               case ISCSI_PARAM_MAX_BURST:
+               case ISCSI_PARAM_PDU_INORDER_EN:
+               case ISCSI_PARAM_DATASEQ_INORDER_EN:
+               case ISCSI_PARAM_ERL:
+               case ISCSI_PARAM_TARGET_NAME:
+               case ISCSI_PARAM_TPGT:
+               case ISCSI_PARAM_USERNAME:
+               case ISCSI_PARAM_PASSWORD:
+               case ISCSI_PARAM_USERNAME_IN:
+               case ISCSI_PARAM_PASSWORD_IN:
+               case ISCSI_PARAM_FAST_ABORT:
+               case ISCSI_PARAM_ABORT_TMO:
+               case ISCSI_PARAM_LU_RESET_TMO:
+               case ISCSI_PARAM_TGT_RESET_TMO:
+               case ISCSI_PARAM_IFACE_NAME:
+               case ISCSI_PARAM_INITIATOR_NAME:
+                       return S_IRUGO;
+               default:
+                       return 0;
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible);
+
+static int __init libcxgbi_init_module(void)
+{
+       sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
+       sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
+
+       pr_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
+               ISCSI_ITT_MASK, sw_tag_idx_bits,
+               ISCSI_AGE_MASK, sw_tag_age_bits);
+
+       ddp_setup_host_page_size();
+       return 0;
+}
+
+static void __exit libcxgbi_exit_module(void)
+{
+       cxgbi_device_unregister_all(0xFF);
+       return;
+}
+
+module_init(libcxgbi_init_module);
+module_exit(libcxgbi_exit_module);