X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?a=blobdiff_plain;f=kernel%2Fdrivers%2Fnet%2Fethernet%2Fintel%2Fi40evf%2Fi40e_txrx.c;fp=kernel%2Fdrivers%2Fnet%2Fethernet%2Fintel%2Fi40evf%2Fi40e_txrx.c;h=39db70a597ed65890653c2887ff9bd3406c9d62a;hb=52f993b8e89487ec9ee15a7fb4979e0f09a45b27;hp=47e9a90d6b100d9874c6a6e77e123c7e5ccc7ac2;hpb=c189ccac5702322ed843fe17057035b7222a59b6;p=kvmfornfv.git diff --git a/kernel/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/kernel/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 47e9a90d6..39db70a59 100644 --- a/kernel/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/kernel/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,11 +51,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -67,6 +63,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -245,16 +245,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -889,31 +879,12 @@ checksum_fail: } /** - * i40e_rx_hash - returns the hash value from the Rx descriptor - * @ring: descriptor ring - * @rx_desc: specific descriptor - **/ -static inline u32 i40e_rx_hash(struct i40e_ring *ring, - union i40e_rx_desc *rx_desc) -{ - const __le64 rss_mask = - cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << - I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); - - if ((ring->netdev->features & NETIF_F_RXHASH) && - (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) - return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - else - return 0; -} - -/** - * i40e_ptype_to_hash - get a hash type + * i40e_ptype_to_htype - get a hash type * @ptype: the ptype value from the descriptor * * Returns a hash type to be used by skb_set_hash **/ -static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) +static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); @@ -930,6 +901,30 @@ static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) return PKT_HASH_TYPE_L2; } +/** + * i40e_rx_hash - set the hash value in the skb + * @ring: descriptor ring + * @rx_desc: specific descriptor + **/ +static inline void i40e_rx_hash(struct i40e_ring *ring, + union i40e_rx_desc *rx_desc, + struct sk_buff *skb, + u8 rx_ptype) +{ + u32 hash; + const __le64 rss_mask = + cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << + I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); + + if (ring->netdev->features & NETIF_F_RXHASH) + return; + + if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { + hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); + skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype)); + } +} + /** * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split * @rx_ring: rx ring to clean @@ -1071,8 +1066,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } - skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), - i40e_ptype_to_hash(rx_ptype)); + i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); + /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; total_rx_packets++; @@ -1189,8 +1184,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) continue; } - skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), - i40e_ptype_to_hash(rx_ptype)); + i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; total_rx_packets++; @@ -1770,6 +1764,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -1810,6 +1807,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1829,6 +1828,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1843,35 +1844,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ #define WB_STRIDE 0x3 - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -1881,15 +1854,78 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * else (kernel < 3.18) + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * set RS bit on EOP for every packet and update tail + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) - writel(i, tx_ring->tail); - else + if (!tail_bump) prefetchw(tx_desc + 1); + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, tx_ring->tail); + } + return; dma_error: