These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / net / wireless / iwlwifi / pcie / rx.c
index 7ff69c6..e06591f 100644 (file)
@@ -1,7 +1,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * Portions of this file are derived from the ipw3945 project, as well
  * as portions of the ieee80211 subsystem header files.
  * resets the Rx queue buffers with new memory.
  *
  * The management in the driver is as follows:
- * + A list of pre-allocated SKBs is stored in iwl->rxq->rx_free.  When
- *   iwl->rxq->free_count drops to or below RX_LOW_WATERMARK, work is scheduled
- *   to replenish the iwl->rxq->rx_free.
- * + In iwl_pcie_rx_replenish (scheduled) if 'processed' != 'read' then the
- *   iwl->rxq is replenished and the READ INDEX is updated (updating the
- *   'processed' and 'read' driver indexes as well)
+ * + A list of pre-allocated RBDs is stored in iwl->rxq->rx_free.
+ *   When the interrupt handler is called, the request is processed.
+ *   The page is either stolen - transferred to the upper layer
+ *   or reused - added immediately to the iwl->rxq->rx_free list.
+ * + When the page is stolen - the driver updates the matching queue's used
+ *   count, detaches the RBD and transfers it to the queue used list.
+ *   When there are two used RBDs - they are transferred to the allocator empty
+ *   list. Work is then scheduled for the allocator to start allocating
+ *   eight buffers.
+ *   When there are another 6 used RBDs - they are transferred to the allocator
+ *   empty list and the driver tries to claim the pre-allocated buffers and
+ *   add them to iwl->rxq->rx_free. If it fails - it continues to claim them
+ *   until ready.
+ *   When there are 8+ buffers in the free list - either from allocation or from
+ *   8 reused unstolen pages - restock is called to update the FW and indexes.
+ * + In order to make sure the allocator always has RBDs to use for allocation
+ *   the allocator has initial pool in the size of num_queues*(8-2) - the
+ *   maximum missing RBDs per allocation request (request posted with 2
+ *    empty RBDs, there is no guarantee when the other 6 RBDs are supplied).
+ *   The queues supplies the recycle of the rest of the RBDs.
  * + A received packet is processed and handed to the kernel network stack,
  *   detached from the iwl->rxq.  The driver 'processed' index is updated.
- * + The Host/Firmware iwl->rxq is replenished at irq thread time from the
- *   rx_free list. If there are no allocated buffers in iwl->rxq->rx_free,
+ * + If there are no allocated buffers in iwl->rxq->rx_free,
  *   the READ INDEX is not incremented and iwl->status(RX_STALLED) is set.
  *   If there were enough free buffers and RX_STALLED is set it is cleared.
  *
  *
  * iwl_rxq_alloc()            Allocates rx_free
  * iwl_pcie_rx_replenish()    Replenishes rx_free list from rx_used, and calls
- *                            iwl_pcie_rxq_restock
+ *                            iwl_pcie_rxq_restock.
+ *                            Used only during initialization.
  * iwl_pcie_rxq_restock()     Moves available buffers from rx_free into Rx
  *                            queue, updates firmware pointers, and updates
- *                            the WRITE index.  If insufficient rx_free buffers
- *                            are available, schedules iwl_pcie_rx_replenish
+ *                            the WRITE index.
+ * iwl_pcie_rx_allocator()     Background work for allocating pages.
  *
  * -- enable interrupts --
  * ISR - iwl_rx()             Detach iwl_rx_mem_buffers from pool up to the
  *                            READ INDEX, detaching the SKB from the pool.
  *                            Moves the packet buffer from queue to rx_used.
+ *                            Posts and claims requests to the allocator.
  *                            Calls iwl_pcie_rxq_restock to refill any empty
  *                            slots.
+ *
+ * RBD life-cycle:
+ *
+ * Init:
+ * rxq.pool -> rxq.rx_used -> rxq.rx_free -> rxq.queue
+ *
+ * Regular Receive interrupt:
+ * Page Stolen:
+ * rxq.queue -> rxq.rx_used -> allocator.rbd_empty ->
+ * allocator.rbd_allocated -> rxq.rx_free -> rxq.queue
+ * Page not Stolen:
+ * rxq.queue -> rxq.rx_free -> rxq.queue
  * ...
  *
  */
@@ -240,10 +267,6 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
                rxq->free_count--;
        }
        spin_unlock(&rxq->lock);
-       /* If the pre-allocated buffer pool is dropping low, schedule to
-        * refill it */
-       if (rxq->free_count <= RX_LOW_WATERMARK)
-               schedule_work(&trans_pcie->rx_replenish);
 
        /* If we've added more space for the firmware to place data, tell it.
         * Increment device's write pointer in multiples of 8. */
@@ -254,6 +277,45 @@ static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
        }
 }
 
+/*
+ * iwl_pcie_rx_alloc_page - allocates and returns a page.
+ *
+ */
+static struct page *iwl_pcie_rx_alloc_page(struct iwl_trans *trans,
+                                          gfp_t priority)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_rxq *rxq = &trans_pcie->rxq;
+       struct page *page;
+       gfp_t gfp_mask = priority;
+
+       if (rxq->free_count > RX_LOW_WATERMARK)
+               gfp_mask |= __GFP_NOWARN;
+
+       if (trans_pcie->rx_page_order > 0)
+               gfp_mask |= __GFP_COMP;
+
+       /* Alloc a new receive buffer */
+       page = alloc_pages(gfp_mask, trans_pcie->rx_page_order);
+       if (!page) {
+               if (net_ratelimit())
+                       IWL_DEBUG_INFO(trans, "alloc_pages failed, order: %d\n",
+                                      trans_pcie->rx_page_order);
+               /* Issue an error if the hardware has consumed more than half
+                * of its free buffer list and we don't have enough
+                * pre-allocated buffers.
+`               */
+               if (rxq->free_count <= RX_LOW_WATERMARK &&
+                   iwl_rxq_space(rxq) > (RX_QUEUE_SIZE / 2) &&
+                   net_ratelimit())
+                       IWL_CRIT(trans,
+                                "Failed to alloc_pages with GFP_KERNEL. Only %u free buffers remaining.\n",
+                                rxq->free_count);
+               return NULL;
+       }
+       return page;
+}
+
 /*
  * iwl_pcie_rxq_alloc_rbs - allocate a page for each used RBD
  *
@@ -269,7 +331,6 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
        struct iwl_rxq *rxq = &trans_pcie->rxq;
        struct iwl_rx_mem_buffer *rxb;
        struct page *page;
-       gfp_t gfp_mask = priority;
 
        while (1) {
                spin_lock(&rxq->lock);
@@ -279,32 +340,10 @@ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
                }
                spin_unlock(&rxq->lock);
 
-               if (rxq->free_count > RX_LOW_WATERMARK)
-                       gfp_mask |= __GFP_NOWARN;
-
-               if (trans_pcie->rx_page_order > 0)
-                       gfp_mask |= __GFP_COMP;
-
                /* Alloc a new receive buffer */
-               page = alloc_pages(gfp_mask, trans_pcie->rx_page_order);
-               if (!page) {
-                       if (net_ratelimit())
-                               IWL_DEBUG_INFO(trans, "alloc_pages failed, "
-                                          "order: %d\n",
-                                          trans_pcie->rx_page_order);
-
-                       if ((rxq->free_count <= RX_LOW_WATERMARK) &&
-                           net_ratelimit())
-                               IWL_CRIT(trans, "Failed to alloc_pages with %s."
-                                        "Only %u free buffers remaining.\n",
-                                        priority == GFP_ATOMIC ?
-                                        "GFP_ATOMIC" : "GFP_KERNEL",
-                                        rxq->free_count);
-                       /* We don't reschedule replenish work here -- we will
-                        * call the restock method and if it still needs
-                        * more buffers it will schedule replenish */
+               page = iwl_pcie_rx_alloc_page(trans, priority);
+               if (!page)
                        return;
-               }
 
                spin_lock(&rxq->lock);
 
@@ -355,7 +394,7 @@ static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans)
 
        lockdep_assert_held(&rxq->lock);
 
-       for (i = 0; i < RX_FREE_BUFFERS + RX_QUEUE_SIZE; i++) {
+       for (i = 0; i < RX_QUEUE_SIZE; i++) {
                if (!rxq->pool[i].page)
                        continue;
                dma_unmap_page(trans->dev, rxq->pool[i].page_dma,
@@ -372,32 +411,164 @@ static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans)
  * When moving to rx_free an page is allocated for the slot.
  *
  * Also restock the Rx queue via iwl_pcie_rxq_restock.
- * This is called as a scheduled work item (except for during initialization)
+ * This is called only during initialization
  */
-static void iwl_pcie_rx_replenish(struct iwl_trans *trans, gfp_t gfp)
+static void iwl_pcie_rx_replenish(struct iwl_trans *trans)
 {
-       iwl_pcie_rxq_alloc_rbs(trans, gfp);
+       iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL);
 
        iwl_pcie_rxq_restock(trans);
 }
 
-static void iwl_pcie_rx_replenish_work(struct work_struct *data)
+/*
+ * iwl_pcie_rx_allocator - Allocates pages in the background for RX queues
+ *
+ * Allocates for each received request 8 pages
+ * Called as a scheduled work item.
+ */
+static void iwl_pcie_rx_allocator(struct iwl_trans *trans)
 {
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
+       struct list_head local_empty;
+       int pending = atomic_xchg(&rba->req_pending, 0);
+
+       IWL_DEBUG_RX(trans, "Pending allocation requests = %d\n", pending);
+
+       /* If we were scheduled - there is at least one request */
+       spin_lock(&rba->lock);
+       /* swap out the rba->rbd_empty to a local list */
+       list_replace_init(&rba->rbd_empty, &local_empty);
+       spin_unlock(&rba->lock);
+
+       while (pending) {
+               int i;
+               struct list_head local_allocated;
+
+               INIT_LIST_HEAD(&local_allocated);
+
+               for (i = 0; i < RX_CLAIM_REQ_ALLOC;) {
+                       struct iwl_rx_mem_buffer *rxb;
+                       struct page *page;
+
+                       /* List should never be empty - each reused RBD is
+                        * returned to the list, and initial pool covers any
+                        * possible gap between the time the page is allocated
+                        * to the time the RBD is added.
+                        */
+                       BUG_ON(list_empty(&local_empty));
+                       /* Get the first rxb from the rbd list */
+                       rxb = list_first_entry(&local_empty,
+                                              struct iwl_rx_mem_buffer, list);
+                       BUG_ON(rxb->page);
+
+                       /* Alloc a new receive buffer */
+                       page = iwl_pcie_rx_alloc_page(trans, GFP_KERNEL);
+                       if (!page)
+                               continue;
+                       rxb->page = page;
+
+                       /* Get physical address of the RB */
+                       rxb->page_dma = dma_map_page(trans->dev, page, 0,
+                                       PAGE_SIZE << trans_pcie->rx_page_order,
+                                       DMA_FROM_DEVICE);
+                       if (dma_mapping_error(trans->dev, rxb->page_dma)) {
+                               rxb->page = NULL;
+                               __free_pages(page, trans_pcie->rx_page_order);
+                               continue;
+                       }
+                       /* dma address must be no more than 36 bits */
+                       BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
+                       /* and also 256 byte aligned! */
+                       BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
+
+                       /* move the allocated entry to the out list */
+                       list_move(&rxb->list, &local_allocated);
+                       i++;
+               }
+
+               pending--;
+               if (!pending) {
+                       pending = atomic_xchg(&rba->req_pending, 0);
+                       IWL_DEBUG_RX(trans,
+                                    "Pending allocation requests = %d\n",
+                                    pending);
+               }
+
+               spin_lock(&rba->lock);
+               /* add the allocated rbds to the allocator allocated list */
+               list_splice_tail(&local_allocated, &rba->rbd_allocated);
+               /* get more empty RBDs for current pending requests */
+               list_splice_tail_init(&rba->rbd_empty, &local_empty);
+               spin_unlock(&rba->lock);
+
+               atomic_inc(&rba->req_ready);
+       }
+
+       spin_lock(&rba->lock);
+       /* return unused rbds to the allocator empty list */
+       list_splice_tail(&local_empty, &rba->rbd_empty);
+       spin_unlock(&rba->lock);
+}
+
+/*
+ * iwl_pcie_rx_allocator_get - Returns the pre-allocated pages
+.*
+.* Called by queue when the queue posted allocation request and
+ * has freed 8 RBDs in order to restock itself.
+ */
+static int iwl_pcie_rx_allocator_get(struct iwl_trans *trans,
+                                    struct iwl_rx_mem_buffer
+                                    *out[RX_CLAIM_REQ_ALLOC])
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
+       int i;
+
+       /*
+        * atomic_dec_if_positive returns req_ready - 1 for any scenario.
+        * If req_ready is 0 atomic_dec_if_positive will return -1 and this
+        * function will return -ENOMEM, as there are no ready requests.
+        * atomic_dec_if_positive will perofrm the *actual* decrement only if
+        * req_ready > 0, i.e. - there are ready requests and the function
+        * hands one request to the caller.
+        */
+       if (atomic_dec_if_positive(&rba->req_ready) < 0)
+               return -ENOMEM;
+
+       spin_lock(&rba->lock);
+       for (i = 0; i < RX_CLAIM_REQ_ALLOC; i++) {
+               /* Get next free Rx buffer, remove it from free list */
+               out[i] = list_first_entry(&rba->rbd_allocated,
+                              struct iwl_rx_mem_buffer, list);
+               list_del(&out[i]->list);
+       }
+       spin_unlock(&rba->lock);
+
+       return 0;
+}
+
+static void iwl_pcie_rx_allocator_work(struct work_struct *data)
+{
+       struct iwl_rb_allocator *rba_p =
+               container_of(data, struct iwl_rb_allocator, rx_alloc);
        struct iwl_trans_pcie *trans_pcie =
-           container_of(data, struct iwl_trans_pcie, rx_replenish);
+               container_of(rba_p, struct iwl_trans_pcie, rba);
 
-       iwl_pcie_rx_replenish(trans_pcie->trans, GFP_KERNEL);
+       iwl_pcie_rx_allocator(trans_pcie->trans);
 }
 
 static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_rxq *rxq = &trans_pcie->rxq;
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
        struct device *dev = trans->dev;
 
        memset(&trans_pcie->rxq, 0, sizeof(trans_pcie->rxq));
 
        spin_lock_init(&rxq->lock);
+       spin_lock_init(&rba->lock);
 
        if (WARN_ON(rxq->bd || rxq->rb_stts))
                return -EINVAL;
@@ -487,15 +658,49 @@ static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
        INIT_LIST_HEAD(&rxq->rx_free);
        INIT_LIST_HEAD(&rxq->rx_used);
        rxq->free_count = 0;
+       rxq->used_count = 0;
 
-       for (i = 0; i < RX_FREE_BUFFERS + RX_QUEUE_SIZE; i++)
+       for (i = 0; i < RX_QUEUE_SIZE; i++)
                list_add(&rxq->pool[i].list, &rxq->rx_used);
 }
 
+static void iwl_pcie_rx_init_rba(struct iwl_rb_allocator *rba)
+{
+       int i;
+
+       lockdep_assert_held(&rba->lock);
+
+       INIT_LIST_HEAD(&rba->rbd_allocated);
+       INIT_LIST_HEAD(&rba->rbd_empty);
+
+       for (i = 0; i < RX_POOL_SIZE; i++)
+               list_add(&rba->pool[i].list, &rba->rbd_empty);
+}
+
+static void iwl_pcie_rx_free_rba(struct iwl_trans *trans)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
+       int i;
+
+       lockdep_assert_held(&rba->lock);
+
+       for (i = 0; i < RX_POOL_SIZE; i++) {
+               if (!rba->pool[i].page)
+                       continue;
+               dma_unmap_page(trans->dev, rba->pool[i].page_dma,
+                              PAGE_SIZE << trans_pcie->rx_page_order,
+                              DMA_FROM_DEVICE);
+               __free_pages(rba->pool[i].page, trans_pcie->rx_page_order);
+               rba->pool[i].page = NULL;
+       }
+}
+
 int iwl_pcie_rx_init(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_rxq *rxq = &trans_pcie->rxq;
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
        int i, err;
 
        if (!rxq->bd) {
@@ -503,11 +708,21 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
                if (err)
                        return err;
        }
+       if (!rba->alloc_wq)
+               rba->alloc_wq = alloc_workqueue("rb_allocator",
+                                               WQ_HIGHPRI | WQ_UNBOUND, 1);
+       INIT_WORK(&rba->rx_alloc, iwl_pcie_rx_allocator_work);
+
+       spin_lock(&rba->lock);
+       atomic_set(&rba->req_pending, 0);
+       atomic_set(&rba->req_ready, 0);
+       /* free all first - we might be reconfigured for a different size */
+       iwl_pcie_rx_free_rba(trans);
+       iwl_pcie_rx_init_rba(rba);
+       spin_unlock(&rba->lock);
 
        spin_lock(&rxq->lock);
 
-       INIT_WORK(&trans_pcie->rx_replenish, iwl_pcie_rx_replenish_work);
-
        /* free all first - we might be reconfigured for a different size */
        iwl_pcie_rxq_free_rbs(trans);
        iwl_pcie_rx_init_rxb_lists(rxq);
@@ -522,7 +737,7 @@ int iwl_pcie_rx_init(struct iwl_trans *trans)
        memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
        spin_unlock(&rxq->lock);
 
-       iwl_pcie_rx_replenish(trans, GFP_KERNEL);
+       iwl_pcie_rx_replenish(trans);
 
        iwl_pcie_rx_hw_init(trans, rxq);
 
@@ -537,6 +752,7 @@ void iwl_pcie_rx_free(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_rxq *rxq = &trans_pcie->rxq;
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
 
        /*if rxq->bd is NULL, it means that nothing has been allocated,
         * exit now */
@@ -545,7 +761,15 @@ void iwl_pcie_rx_free(struct iwl_trans *trans)
                return;
        }
 
-       cancel_work_sync(&trans_pcie->rx_replenish);
+       cancel_work_sync(&rba->rx_alloc);
+       if (rba->alloc_wq) {
+               destroy_workqueue(rba->alloc_wq);
+               rba->alloc_wq = NULL;
+       }
+
+       spin_lock(&rba->lock);
+       iwl_pcie_rx_free_rba(trans);
+       spin_unlock(&rba->lock);
 
        spin_lock(&rxq->lock);
        iwl_pcie_rxq_free_rbs(trans);
@@ -566,8 +790,49 @@ void iwl_pcie_rx_free(struct iwl_trans *trans)
        rxq->rb_stts = NULL;
 }
 
+/*
+ * iwl_pcie_rx_reuse_rbd - Recycle used RBDs
+ *
+ * Called when a RBD can be reused. The RBD is transferred to the allocator.
+ * When there are 2 empty RBDs - a request for allocation is posted
+ */
+static void iwl_pcie_rx_reuse_rbd(struct iwl_trans *trans,
+                                 struct iwl_rx_mem_buffer *rxb,
+                                 struct iwl_rxq *rxq, bool emergency)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_rb_allocator *rba = &trans_pcie->rba;
+
+       /* Move the RBD to the used list, will be moved to allocator in batches
+        * before claiming or posting a request*/
+       list_add_tail(&rxb->list, &rxq->rx_used);
+
+       if (unlikely(emergency))
+               return;
+
+       /* Count the allocator owned RBDs */
+       rxq->used_count++;
+
+       /* If we have RX_POST_REQ_ALLOC new released rx buffers -
+        * issue a request for allocator. Modulo RX_CLAIM_REQ_ALLOC is
+        * used for the case we failed to claim RX_CLAIM_REQ_ALLOC,
+        * after but we still need to post another request.
+        */
+       if ((rxq->used_count % RX_CLAIM_REQ_ALLOC) == RX_POST_REQ_ALLOC) {
+               /* Move the 2 RBDs to the allocator ownership.
+                Allocator has another 6 from pool for the request completion*/
+               spin_lock(&rba->lock);
+               list_splice_tail_init(&rxq->rx_used, &rba->rbd_empty);
+               spin_unlock(&rba->lock);
+
+               atomic_inc(&rba->req_pending);
+               queue_work(rba->alloc_wq, &rba->rx_alloc);
+       }
+}
+
 static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
-                               struct iwl_rx_mem_buffer *rxb)
+                               struct iwl_rx_mem_buffer *rxb,
+                               bool emergency)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_rxq *rxq = &trans_pcie->rxq;
@@ -583,10 +848,9 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 
        while (offset + sizeof(u32) + sizeof(struct iwl_cmd_header) < max_len) {
                struct iwl_rx_packet *pkt;
-               struct iwl_device_cmd *cmd;
                u16 sequence;
                bool reclaim;
-               int index, cmd_index, err, len;
+               int index, cmd_index, len;
                struct iwl_rx_cmd_buffer rxcb = {
                        ._offset = offset,
                        ._rx_page_order = trans_pcie->rx_page_order,
@@ -634,12 +898,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
                index = SEQ_TO_INDEX(sequence);
                cmd_index = get_cmd_index(&txq->q, index);
 
-               if (reclaim)
-                       cmd = txq->entries[cmd_index].cmd;
-               else
-                       cmd = NULL;
-
-               err = iwl_op_mode_rx(trans->op_mode, &rxcb, cmd);
+               iwl_op_mode_rx(trans->op_mode, &trans_pcie->napi, &rxcb);
 
                if (reclaim) {
                        kzfree(txq->entries[cmd_index].free_buf);
@@ -657,7 +916,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
                         * iwl_trans_send_cmd()
                         * as we reclaim the driver command queue */
                        if (!rxcb._page_stolen)
-                               iwl_pcie_hcmd_complete(trans, &rxcb, err);
+                               iwl_pcie_hcmd_complete(trans, &rxcb);
                        else
                                IWL_WARN(trans, "Claim null rxb?\n");
                }
@@ -688,13 +947,13 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
                         */
                        __free_pages(rxb->page, trans_pcie->rx_page_order);
                        rxb->page = NULL;
-                       list_add_tail(&rxb->list, &rxq->rx_used);
+                       iwl_pcie_rx_reuse_rbd(trans, rxb, rxq, emergency);
                } else {
                        list_add_tail(&rxb->list, &rxq->rx_free);
                        rxq->free_count++;
                }
        } else
-               list_add_tail(&rxb->list, &rxq->rx_used);
+               iwl_pcie_rx_reuse_rbd(trans, rxb, rxq, emergency);
 }
 
 /*
@@ -704,10 +963,8 @@ static void iwl_pcie_rx_handle(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_rxq *rxq = &trans_pcie->rxq;
-       u32 r, i;
-       u8 fill_rx = 0;
-       u32 count = 8;
-       int total_empty;
+       u32 r, i, j, count = 0;
+       bool emergency = false;
 
 restart:
        spin_lock(&rxq->lock);
@@ -720,47 +977,95 @@ restart:
        if (i == r)
                IWL_DEBUG_RX(trans, "HW = SW = %d\n", r);
 
-       /* calculate total frames need to be restock after handling RX */
-       total_empty = r - rxq->write_actual;
-       if (total_empty < 0)
-               total_empty += RX_QUEUE_SIZE;
-
-       if (total_empty > (RX_QUEUE_SIZE / 2))
-               fill_rx = 1;
-
        while (i != r) {
                struct iwl_rx_mem_buffer *rxb;
 
+               if (unlikely(rxq->used_count == RX_QUEUE_SIZE / 2))
+                       emergency = true;
+
                rxb = rxq->queue[i];
                rxq->queue[i] = NULL;
 
                IWL_DEBUG_RX(trans, "rxbuf: HW = %d, SW = %d (%p)\n",
                             r, i, rxb);
-               iwl_pcie_rx_handle_rb(trans, rxb);
+               iwl_pcie_rx_handle_rb(trans, rxb, emergency);
 
                i = (i + 1) & RX_QUEUE_MASK;
-               /* If there are a lot of unused frames,
-                * restock the Rx queue so ucode wont assert. */
-               if (fill_rx) {
+
+               /* If we have RX_CLAIM_REQ_ALLOC released rx buffers -
+                * try to claim the pre-allocated buffers from the allocator */
+               if (rxq->used_count >= RX_CLAIM_REQ_ALLOC) {
+                       struct iwl_rb_allocator *rba = &trans_pcie->rba;
+                       struct iwl_rx_mem_buffer *out[RX_CLAIM_REQ_ALLOC];
+
+                       if (rxq->used_count % RX_CLAIM_REQ_ALLOC == 0 &&
+                           !emergency) {
+                               /* Add the remaining 6 empty RBDs
+                               * for allocator use
+                                */
+                               spin_lock(&rba->lock);
+                               list_splice_tail_init(&rxq->rx_used,
+                                                     &rba->rbd_empty);
+                               spin_unlock(&rba->lock);
+                       }
+
+                       /* If not ready - continue, will try to reclaim later.
+                       * No need to reschedule work - allocator exits only on
+                       * success */
+                       if (!iwl_pcie_rx_allocator_get(trans, out)) {
+                               /* If success - then RX_CLAIM_REQ_ALLOC
+                                * buffers were retrieved and should be added
+                                * to free list */
+                               rxq->used_count -= RX_CLAIM_REQ_ALLOC;
+                               for (j = 0; j < RX_CLAIM_REQ_ALLOC; j++) {
+                                       list_add_tail(&out[j]->list,
+                                                     &rxq->rx_free);
+                                       rxq->free_count++;
+                               }
+                       }
+               }
+               if (emergency) {
                        count++;
-                       if (count >= 8) {
-                               rxq->read = i;
-                               spin_unlock(&rxq->lock);
-                               iwl_pcie_rx_replenish(trans, GFP_ATOMIC);
+                       if (count == 8) {
                                count = 0;
-                               goto restart;
+                               if (rxq->used_count < RX_QUEUE_SIZE / 3)
+                                       emergency = false;
+                               spin_unlock(&rxq->lock);
+                               iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+                               spin_lock(&rxq->lock);
                        }
                }
+               /* handle restock for three cases, can be all of them at once:
+               * - we just pulled buffers from the allocator
+               * - we have 8+ unstolen pages accumulated
+               * - we are in emergency and allocated buffers
+                */
+               if (rxq->free_count >=  RX_CLAIM_REQ_ALLOC) {
+                       rxq->read = i;
+                       spin_unlock(&rxq->lock);
+                       iwl_pcie_rxq_restock(trans);
+                       goto restart;
+               }
        }
 
        /* Backtrack one entry */
        rxq->read = i;
        spin_unlock(&rxq->lock);
 
-       if (fill_rx)
-               iwl_pcie_rx_replenish(trans, GFP_ATOMIC);
-       else
-               iwl_pcie_rxq_restock(trans);
+       /*
+        * handle a case where in emergency there are some unallocated RBDs.
+        * those RBDs are in the used list, but are not tracked by the queue's
+        * used_count which counts allocator owned RBDs.
+        * unallocated emergency RBDs must be allocated on exit, otherwise
+        * when called again the function may not be in emergency mode and
+        * they will be handed to the allocator with no tracking in the RBD
+        * allocator counters, which will lead to them never being claimed back
+        * by the queue.
+        * by allocating them here, they are now in the queue free list, and
+        * will be restocked by the next call of iwl_pcie_rxq_restock.
+        */
+       if (unlikely(emergency && count))
+               iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
 
        if (trans_pcie->napi.poll)
                napi_gro_flush(&trans_pcie->napi, false);
@@ -772,9 +1077,11 @@ restart:
 static void iwl_pcie_irq_handle_error(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       int i;
 
        /* W/A for WiFi/WiMAX coex and WiMAX own the RF */
        if (trans->cfg->internal_wimax_coex &&
+           !trans->cfg->apmg_not_supported &&
            (!(iwl_read_prph(trans, APMG_CLK_CTRL_REG) &
                             APMS_CLK_VAL_MRB_FUNC_MODE) ||
             (iwl_read_prph(trans, APMG_PS_CTRL_REG) &
@@ -794,6 +1101,9 @@ static void iwl_pcie_irq_handle_error(struct iwl_trans *trans)
        iwl_trans_fw_error(trans);
        local_bh_enable();
 
+       for (i = 0; i < trans->cfg->base_params->num_of_queues; i++)
+               del_timer(&trans_pcie->txq[i].stuck_timer);
+
        clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
        wake_up(&trans_pcie->wait_command_queue);
 }
@@ -1002,7 +1312,9 @@ irqreturn_t iwl_pcie_irq_handler(int irq, void *dev_id)
 
                isr_stats->rfkill++;
 
+               mutex_lock(&trans_pcie->mutex);
                iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+               mutex_unlock(&trans_pcie->mutex);
                if (hw_rfkill) {
                        set_bit(STATUS_RFKILL, &trans->status);
                        if (test_and_clear_bit(STATUS_SYNC_HCMD_ACTIVE,
@@ -1194,8 +1506,9 @@ void iwl_pcie_reset_ict(struct iwl_trans *trans)
 
        val = trans_pcie->ict_tbl_dma >> ICT_SHIFT;
 
-       val |= CSR_DRAM_INT_TBL_ENABLE;
-       val |= CSR_DRAM_INIT_TBL_WRAP_CHECK;
+       val |= CSR_DRAM_INT_TBL_ENABLE |
+              CSR_DRAM_INIT_TBL_WRAP_CHECK |
+              CSR_DRAM_INIT_TBL_WRITE_POINTER;
 
        IWL_DEBUG_ISR(trans, "CSR_DRAM_INT_TBL_REG =0x%x\n", val);