These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / rds / ib.h
index c36d713..b3fdebb 100644 (file)
@@ -9,8 +9,11 @@
 #include "rds.h"
 #include "rdma_transport.h"
 
-#define RDS_FMR_SIZE                   256
-#define RDS_FMR_POOL_SIZE              8192
+#define RDS_FMR_1M_POOL_SIZE           (8192 / 2)
+#define RDS_FMR_1M_MSG_SIZE            256
+#define RDS_FMR_8K_MSG_SIZE            2
+#define RDS_MR_8K_SCALE                        (256 / (RDS_FMR_8K_MSG_SIZE + 1))
+#define RDS_FMR_8K_POOL_SIZE           (RDS_MR_8K_SCALE * (8192 / 2))
 
 #define RDS_IB_MAX_SGE                 8
 #define RDS_IB_RECV_SGE                2
@@ -24,6 +27,9 @@
 
 #define RDS_IB_RECYCLE_BATCH_COUNT     32
 
+#define RDS_IB_WC_MAX                  32
+#define RDS_IB_SEND_OP                 BIT_ULL(63)
+
 extern struct rw_semaphore rds_ib_devices_lock;
 extern struct list_head rds_ib_devices;
 
@@ -69,7 +75,11 @@ struct rds_ib_connect_private {
 
 struct rds_ib_send_work {
        void                    *s_op;
-       struct ib_send_wr       s_wr;
+       union {
+               struct ib_send_wr       s_wr;
+               struct ib_rdma_wr       s_rdma_wr;
+               struct ib_atomic_wr     s_atomic_wr;
+       };
        struct ib_sge           s_sge[RDS_IB_MAX_SGE];
        unsigned long           s_queued;
 };
@@ -89,6 +99,20 @@ struct rds_ib_work_ring {
        atomic_t        w_free_ctr;
 };
 
+/* Rings are posted with all the allocations they'll need to queue the
+ * incoming message to the receiving socket so this can't fail.
+ * All fragments start with a header, so we can make sure we're not receiving
+ * garbage, and we can tell a small 8 byte fragment from an ACK frame.
+ */
+struct rds_ib_ack_state {
+       u64             ack_next;
+       u64             ack_recv;
+       unsigned int    ack_required:1;
+       unsigned int    ack_next_valid:1;
+       unsigned int    ack_recv_valid:1;
+};
+
+
 struct rds_ib_device;
 
 struct rds_ib_connection {
@@ -100,9 +124,14 @@ struct rds_ib_connection {
        /* alphabet soup, IBTA style */
        struct rdma_cm_id       *i_cm_id;
        struct ib_pd            *i_pd;
-       struct ib_mr            *i_mr;
        struct ib_cq            *i_send_cq;
        struct ib_cq            *i_recv_cq;
+       struct ib_wc            i_send_wc[RDS_IB_WC_MAX];
+       struct ib_wc            i_recv_wc[RDS_IB_WC_MAX];
+
+       /* interrupt handling */
+       struct tasklet_struct   i_send_tasklet;
+       struct tasklet_struct   i_recv_tasklet;
 
        /* tx */
        struct rds_ib_work_ring i_send_ring;
@@ -113,7 +142,6 @@ struct rds_ib_connection {
        atomic_t                i_signaled_sends;
 
        /* rx */
-       struct tasklet_struct   i_recv_tasklet;
        struct mutex            i_recv_mutex;
        struct rds_ib_work_ring i_recv_ring;
        struct rds_ib_incoming  *i_ibinc;
@@ -165,6 +193,12 @@ struct rds_ib_connection {
 struct rds_ib_ipaddr {
        struct list_head        list;
        __be32                  ipaddr;
+       struct rcu_head         rcu;
+};
+
+enum {
+       RDS_IB_MR_8K_POOL,
+       RDS_IB_MR_1M_POOL,
 };
 
 struct rds_ib_device {
@@ -173,10 +207,12 @@ struct rds_ib_device {
        struct list_head        conn_list;
        struct ib_device        *dev;
        struct ib_pd            *pd;
-       struct ib_mr            *mr;
-       struct rds_ib_mr_pool   *mr_pool;
-       unsigned int            fmr_max_remaps;
        unsigned int            max_fmrs;
+       struct rds_ib_mr_pool   *mr_1m_pool;
+       struct rds_ib_mr_pool   *mr_8k_pool;
+       unsigned int            fmr_max_remaps;
+       unsigned int            max_8k_fmrs;
+       unsigned int            max_1m_fmrs;
        int                     max_sge;
        unsigned int            max_wrs;
        unsigned int            max_initiator_depth;
@@ -199,14 +235,14 @@ struct rds_ib_device {
 struct rds_ib_statistics {
        uint64_t        s_ib_connect_raced;
        uint64_t        s_ib_listen_closed_stale;
-       uint64_t        s_ib_tx_cq_call;
+       uint64_t        s_ib_evt_handler_call;
+       uint64_t        s_ib_tasklet_call;
        uint64_t        s_ib_tx_cq_event;
        uint64_t        s_ib_tx_ring_full;
        uint64_t        s_ib_tx_throttle;
        uint64_t        s_ib_tx_sg_mapping_failure;
        uint64_t        s_ib_tx_stalled;
        uint64_t        s_ib_tx_credit_updates;
-       uint64_t        s_ib_rx_cq_call;
        uint64_t        s_ib_rx_cq_event;
        uint64_t        s_ib_rx_ring_empty;
        uint64_t        s_ib_rx_refill_from_cq;
@@ -218,12 +254,18 @@ struct rds_ib_statistics {
        uint64_t        s_ib_ack_send_delayed;
        uint64_t        s_ib_ack_send_piggybacked;
        uint64_t        s_ib_ack_received;
-       uint64_t        s_ib_rdma_mr_alloc;
-       uint64_t        s_ib_rdma_mr_free;
-       uint64_t        s_ib_rdma_mr_used;
-       uint64_t        s_ib_rdma_mr_pool_flush;
-       uint64_t        s_ib_rdma_mr_pool_wait;
-       uint64_t        s_ib_rdma_mr_pool_depleted;
+       uint64_t        s_ib_rdma_mr_8k_alloc;
+       uint64_t        s_ib_rdma_mr_8k_free;
+       uint64_t        s_ib_rdma_mr_8k_used;
+       uint64_t        s_ib_rdma_mr_8k_pool_flush;
+       uint64_t        s_ib_rdma_mr_8k_pool_wait;
+       uint64_t        s_ib_rdma_mr_8k_pool_depleted;
+       uint64_t        s_ib_rdma_mr_1m_alloc;
+       uint64_t        s_ib_rdma_mr_1m_free;
+       uint64_t        s_ib_rdma_mr_1m_used;
+       uint64_t        s_ib_rdma_mr_1m_pool_flush;
+       uint64_t        s_ib_rdma_mr_1m_pool_wait;
+       uint64_t        s_ib_rdma_mr_1m_pool_depleted;
        uint64_t        s_ib_atomic_cswp;
        uint64_t        s_ib_atomic_fadd;
 };
@@ -235,28 +277,34 @@ extern struct workqueue_struct *rds_ib_wq;
  * doesn't define it.
  */
 static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
-               struct scatterlist *sg, unsigned int sg_dma_len, int direction)
+                                             struct scatterlist *sglist,
+                                             unsigned int sg_dma_len,
+                                             int direction)
 {
+       struct scatterlist *sg;
        unsigned int i;
 
-       for (i = 0; i < sg_dma_len; ++i) {
+       for_each_sg(sglist, sg, sg_dma_len, i) {
                ib_dma_sync_single_for_cpu(dev,
-                               ib_sg_dma_address(dev, &sg[i]),
-                               ib_sg_dma_len(dev, &sg[i]),
+                               ib_sg_dma_address(dev, sg),
+                               ib_sg_dma_len(dev, sg),
                                direction);
        }
 }
 #define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
 
 static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
-               struct scatterlist *sg, unsigned int sg_dma_len, int direction)
+                                                struct scatterlist *sglist,
+                                                unsigned int sg_dma_len,
+                                                int direction)
 {
+       struct scatterlist *sg;
        unsigned int i;
 
-       for (i = 0; i < sg_dma_len; ++i) {
+       for_each_sg(sglist, sg, sg_dma_len, i) {
                ib_dma_sync_single_for_device(dev,
-                               ib_sg_dma_address(dev, &sg[i]),
-                               ib_sg_dma_len(dev, &sg[i]),
+                               ib_sg_dma_address(dev, sg),
+                               ib_sg_dma_len(dev, sg),
                                direction);
        }
 }
@@ -269,7 +317,8 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
 extern struct ib_client rds_ib_client;
 
-extern unsigned int fmr_message_size;
+extern unsigned int rds_ib_fmr_1m_pool_size;
+extern unsigned int rds_ib_fmr_8k_pool_size;
 extern unsigned int rds_ib_retry_count;
 
 extern spinlock_t ib_nodev_conns_lock;
@@ -299,7 +348,8 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_destroy_nodev_conns(void);
-struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
+struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
+                                            int npages);
 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -307,6 +357,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 void rds_ib_sync_mr(void *trans_private, int dir);
 void rds_ib_free_mr(void *trans_private, int invalidate);
 void rds_ib_flush_mrs(void);
+int rds_ib_fmr_init(void);
+void rds_ib_fmr_exit(void);
 
 /* ib_recv.c */
 int rds_ib_recv_init(void);
@@ -314,10 +366,11 @@ void rds_ib_recv_exit(void);
 int rds_ib_recv(struct rds_connection *conn);
 int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
-void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
+void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
 void rds_ib_inc_free(struct rds_incoming *inc);
 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
+                            struct rds_ib_ack_state *state);
 void rds_ib_recv_tasklet_fn(unsigned long data);
 void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
 void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
@@ -325,6 +378,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
 void rds_ib_attempt_ack(struct rds_ib_connection *ic);
 void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
 u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required);
 
 /* ib_ring.c */
 void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
@@ -339,11 +393,10 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
 extern wait_queue_head_t rds_ib_ring_empty_wait;
 
 /* ib_send.c */
-char *rds_ib_wc_status_str(enum ib_wc_status status);
 void rds_ib_xmit_complete(struct rds_connection *conn);
 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                unsigned int hdr_off, unsigned int sg, unsigned int off);
-void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
 void rds_ib_send_init_ring(struct rds_ib_connection *ic);
 void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);