These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / block / blk-core.c
diff --git a/kernel/block/blk-core.c b/kernel/block/blk-core.c

index 4e7dded..2f7afb9 100644 (file)
--- a/kernel/block/blk-core.c
+++ b/kernel/block/blk-core.c
@@ -32,12 +32,12 @@
  #include <linux/delay.h>
  #include <linux/ratelimit.h>
  #include <linux/pm_runtime.h>
+#include <linux/blk-cgroup.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/block.h>
  
  #include "blk.h"
-#include "blk-cgroup.h"
  #include "blk-mq.h"
  
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -63,6 +63,31 @@ struct kmem_cache *blk_requestq_cachep;
   */
  static struct workqueue_struct *kblockd_workqueue;
  
+static void blk_clear_congested(struct request_list *rl, int sync)
+{
+#ifdef CONFIG_CGROUP_WRITEBACK
+       clear_wb_congested(rl->blkg->wb_congested, sync);
+#else
+       /*
+        * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't
+        * flip its congestion state for events on other blkcgs.
+        */
+       if (rl == &rl->q->root_rl)
+               clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
+#endif
+}
+
+static void blk_set_congested(struct request_list *rl, int sync)
+{
+#ifdef CONFIG_CGROUP_WRITEBACK
+       set_wb_congested(rl->blkg->wb_congested, sync);
+#else
+       /* see blk_clear_congested() */
+       if (rl == &rl->q->root_rl)
+               set_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
+#endif
+}
+
  void blk_queue_congestion_threshold(struct request_queue *q)
  {
         int nr;
@@ -121,18 +146,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
                           unsigned int nbytes, int error)
  {
         if (error)
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
-       else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-               error = -EIO;
+               bio->bi_error = error;
  
         if (unlikely(rq->cmd_flags & REQ_QUIET))
-               set_bit(BIO_QUIET, &bio->bi_flags);
+               bio_set_flag(bio, BIO_QUIET);
  
         bio_advance(bio, nbytes);
  
         /* don't actually finish bio if it's part of flush sequence */
         if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
-               bio_endio(bio, error);
+               bio_endio(bio);
  }
  
  void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -186,6 +209,22 @@ void blk_delay_queue(struct request_queue *q, unsigned long msecs)
  }
  EXPORT_SYMBOL(blk_delay_queue);
  
+/**
+ * blk_start_queue_async - asynchronously restart a previously stopped queue
+ * @q:    The &struct request_queue in question
+ *
+ * Description:
+ *   blk_start_queue_async() will clear the stop flag on the queue, and
+ *   ensure that the request_fn for the queue is run from an async
+ *   context.
+ **/
+void blk_start_queue_async(struct request_queue *q)
+{
+       queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+       blk_run_queue_async(q);
+}
+EXPORT_SYMBOL(blk_start_queue_async);
+
  /**
   * blk_start_queue - restart a previously stopped queue
   * @q:    The &struct request_queue in question
@@ -288,6 +327,7 @@ inline void __blk_run_queue_uncond(struct request_queue *q)
         q->request_fn(q);
         q->request_fn_active--;
  }
+EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  
  /**
   * __blk_run_queue - run a single device queue
@@ -533,29 +573,30 @@ void blk_cleanup_queue(struct request_queue *q)
          * Drain all requests queued before DYING marking. Set DEAD flag to
          * prevent that q->request_fn() gets invoked after draining finished.
          */
-       if (q->mq_ops) {
-               blk_mq_freeze_queue(q);
-               spin_lock_irq(lock);
-       } else {
-               spin_lock_irq(lock);
+       blk_freeze_queue(q);
+       spin_lock_irq(lock);
+       if (!q->mq_ops)
                 __blk_drain_queue(q, true);
-       }
         queue_flag_set(QUEUE_FLAG_DEAD, q);
         spin_unlock_irq(lock);
  
+       /* for synchronous bio-based driver finish in-flight integrity i/o */
+       blk_flush_integrity();
+
         /* @q won't process any more request, flush async actions */
         del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
         blk_sync_queue(q);
  
         if (q->mq_ops)
                 blk_mq_free_queue(q);
+       percpu_ref_exit(&q->q_usage_counter);
  
         spin_lock_irq(lock);
         if (q->queue_lock != &q->__queue_lock)
                 q->queue_lock = &q->__queue_lock;
         spin_unlock_irq(lock);
  
-       bdi_destroy(&q->backing_dev_info);
+       bdi_unregister(&q->backing_dev_info);
  
         /* @q is and will stay empty, shutdown and put */
         blk_put_queue(q);
@@ -608,6 +649,40 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
  }
  EXPORT_SYMBOL(blk_alloc_queue);
  
+int blk_queue_enter(struct request_queue *q, gfp_t gfp)
+{
+       while (true) {
+               int ret;
+
+               if (percpu_ref_tryget_live(&q->q_usage_counter))
+                       return 0;
+
+               if (!gfpflags_allow_blocking(gfp))
+                       return -EBUSY;
+
+               ret = swait_event_interruptible(q->mq_freeze_wq,
+                               !atomic_read(&q->mq_freeze_depth) ||
+                               blk_queue_dying(q));
+               if (blk_queue_dying(q))
+                       return -ENODEV;
+               if (ret)
+                       return ret;
+       }
+}
+
+void blk_queue_exit(struct request_queue *q)
+{
+       percpu_ref_put(&q->q_usage_counter);
+}
+
+static void blk_queue_usage_counter_release(struct percpu_ref *ref)
+{
+       struct request_queue *q =
+               container_of(ref, struct request_queue, q_usage_counter);
+
+       swake_up_all(&q->mq_freeze_wq);
+}
+
  struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
  {
         struct request_queue *q;
@@ -622,16 +697,19 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         if (q->id < 0)
                 goto fail_q;
  
+       q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+       if (!q->bio_split)
+               goto fail_id;
+
         q->backing_dev_info.ra_pages =
                         (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-       q->backing_dev_info.state = 0;
-       q->backing_dev_info.capabilities = 0;
+       q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
         q->backing_dev_info.name = "block";
         q->node = node_id;
  
         err = bdi_init(&q->backing_dev_info);
         if (err)
-               goto fail_id;
+               goto fail_split;
  
         setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
                     laptop_mode_timer_fn, (unsigned long) q);
@@ -664,15 +742,28 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         q->bypass_depth = 1;
         __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
  
-       init_swait_head(&q->mq_freeze_wq);
+       init_swait_queue_head(&q->mq_freeze_wq);
  
-       if (blkcg_init_queue(q))
+       /*
+        * Init percpu_ref in atomic mode so that it's faster to shutdown.
+        * See blk_register_queue() for details.
+        */
+       if (percpu_ref_init(&q->q_usage_counter,
+                               blk_queue_usage_counter_release,
+                               PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
                 goto fail_bdi;
  
+       if (blkcg_init_queue(q))
+               goto fail_ref;
+
         return q;
  
+fail_ref:
+       percpu_ref_exit(&q->q_usage_counter);
  fail_bdi:
         bdi_destroy(&q->backing_dev_info);
+fail_split:
+       bioset_free(q->bio_split);
  fail_id:
         ida_simple_remove(&blk_queue_ida, q->id);
  fail_q:
@@ -737,7 +828,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
  }
  EXPORT_SYMBOL(blk_init_queue_node);
  
-static void blk_queue_bio(struct request_queue *q, struct bio *bio);
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
  
  struct request_queue *
  blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
@@ -848,13 +939,8 @@ static void __freed_request(struct request_list *rl, int sync)
  {
         struct request_queue *q = rl->q;
  
-       /*
-        * bdi isn't aware of blkcg yet.  As all async IOs end up root
-        * blkcg anyway, just use root blkcg state.
-        */
-       if (rl == &q->root_rl &&
-           rl->count[sync] < queue_congestion_off_threshold(q))
-               blk_clear_queue_congested(q, sync);
+       if (rl->count[sync] < queue_congestion_off_threshold(q))
+               blk_clear_congested(rl, sync);
  
         if (rl->count[sync] + 1 <= q->nr_requests) {
                 if (waitqueue_active(&rl->wait[sync]))
@@ -887,25 +973,25 @@ static void freed_request(struct request_list *rl, unsigned int flags)
  int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
  {
         struct request_list *rl;
+       int on_thresh, off_thresh;
  
         spin_lock_irq(q->queue_lock);
         q->nr_requests = nr;
         blk_queue_congestion_threshold(q);
+       on_thresh = queue_congestion_on_threshold(q);
+       off_thresh = queue_congestion_off_threshold(q);
  
-       /* congestion isn't cgroup aware and follows root blkcg for now */
-       rl = &q->root_rl;
-
-       if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
-               blk_set_queue_congested(q, BLK_RW_SYNC);
-       else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
-               blk_clear_queue_congested(q, BLK_RW_SYNC);
+       blk_queue_for_each_rl(rl, q) {
+               if (rl->count[BLK_RW_SYNC] >= on_thresh)
+                       blk_set_congested(rl, BLK_RW_SYNC);
+               else if (rl->count[BLK_RW_SYNC] < off_thresh)
+                       blk_clear_congested(rl, BLK_RW_SYNC);
  
-       if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
-               blk_set_queue_congested(q, BLK_RW_ASYNC);
-       else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
-               blk_clear_queue_congested(q, BLK_RW_ASYNC);
+               if (rl->count[BLK_RW_ASYNC] >= on_thresh)
+                       blk_set_congested(rl, BLK_RW_ASYNC);
+               else if (rl->count[BLK_RW_ASYNC] < off_thresh)
+                       blk_clear_congested(rl, BLK_RW_ASYNC);
  
-       blk_queue_for_each_rl(rl, q) {
                 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
                         blk_set_rl_full(rl, BLK_RW_SYNC);
                 } else {
@@ -1015,12 +1101,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
                                 }
                         }
                 }
-               /*
-                * bdi isn't aware of blkcg yet.  As all async IOs end up
-                * root blkcg anyway, just use root blkcg state.
-                */
-               if (rl == &q->root_rl)
-                       blk_set_queue_congested(q, is_sync);
+               blk_set_congested(rl, is_sync);
         }
  
         /*
@@ -1144,8 +1225,8 @@ rq_starved:
   * @bio: bio to allocate request for (can be %NULL)
   * @gfp_mask: allocation mask
   *
- * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
- * function keeps retrying under memory pressure and fails iff @q is dead.
+ * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
+ * this function keeps retrying under memory pressure and fails iff @q is dead.
   *
   * Must be called with @q->queue_lock held and,
   * Returns ERR_PTR on failure, with @q->queue_lock held.
@@ -1165,7 +1246,7 @@ retry:
         if (!IS_ERR(rq))
                 return rq;
  
-       if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
+       if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
                 blk_put_rl(rl);
                 return rq;
         }
@@ -1243,11 +1324,11 @@ EXPORT_SYMBOL(blk_get_request);
   * BUG.
   *
   * WARNING: When allocating/cloning a bio-chain, careful consideration should be
- * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for
- * anything but the first bio in the chain. Otherwise you risk waiting for IO
- * completion of a bio that hasn't been submitted yet, thus resulting in a
- * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead
- * of bio_alloc(), as that avoids the mempool deadlock.
+ * given to how you allocate bios. In particular, you cannot use
+ * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
+ * you risk waiting for IO completion of a bio that hasn't been submitted yet,
+ * thus resulting in a deadlock. Alternatively bios should be allocated using
+ * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
   * If possible a big IO should be split into smaller parts when allocation
   * fails. Partial allocation should not be an error, or you risk a live-lock.
   */
@@ -1513,6 +1594,9 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
   * @q: request_queue new bio is being queued at
   * @bio: new bio being queued
   * @request_count: out parameter for number of traversed plugged requests
+ * @same_queue_rq: pointer to &struct request that gets filled in when
+ * another request associated with @q is found on the plug list
+ * (optional, may be %NULL)
   *
   * Determine whether @bio being queued on @q can be merged with a request
   * on %current's plugged list.  Returns %true if merge was successful,
@@ -1528,7 +1612,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
   * Caller must ensure !blk_queue_nomerges(q) beforehand.
   */
  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                           unsigned int *request_count)
+                           unsigned int *request_count,
+                           struct request **same_queue_rq)
  {
         struct blk_plug *plug;
         struct request *rq;
@@ -1548,8 +1633,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
         list_for_each_entry_reverse(rq, plug_list, queuelist) {
                 int el_ret;
  
-               if (rq->q == q)
+               if (rq->q == q) {
                         (*request_count)++;
+                       /*
+                        * Only blk-mq multiple hardware queues case checks the
+                        * rq in the same queue, there should be only one such
+                        * rq in a queue
+                        **/
+                       if (same_queue_rq)
+                               *same_queue_rq = rq;
+               }
  
                 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
                         continue;
@@ -1569,6 +1662,30 @@ out:
         return ret;
  }
  
+unsigned int blk_plug_queued_count(struct request_queue *q)
+{
+       struct blk_plug *plug;
+       struct request *rq;
+       struct list_head *plug_list;
+       unsigned int ret = 0;
+
+       plug = current->plug;
+       if (!plug)
+               goto out;
+
+       if (q->mq_ops)
+               plug_list = &plug->mq_list;
+       else
+               plug_list = &plug->list;
+
+       list_for_each_entry(rq, plug_list, queuelist) {
+               if (rq->q == q)
+                       ret++;
+       }
+out:
+       return ret;
+}
+
  void init_request_from_bio(struct request *req, struct bio *bio)
  {
         req->cmd_type = REQ_TYPE_FS;
@@ -1583,7 +1700,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
         blk_rq_bio_prep(req->q, req, bio);
  }
  
-static void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
  {
         const bool sync = !!(bio->bi_rw & REQ_SYNC);
         struct blk_plug *plug;
@@ -1598,9 +1715,12 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
          */
         blk_queue_bounce(q, &bio);
  
+       blk_queue_split(q, &bio, q->bio_split);
+
         if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio_endio(bio, -EIO);
-               return;
+               bio->bi_error = -EIO;
+               bio_endio(bio);
+               return BLK_QC_T_NONE;
         }
  
         if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
@@ -1613,9 +1733,11 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
          * Check if we can merge with the plugged list before grabbing
          * any locks.
          */
-       if (!blk_queue_nomerges(q) &&
-           blk_attempt_plug_merge(q, bio, &request_count))
-               return;
+       if (!blk_queue_nomerges(q)) {
+               if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
+                       return BLK_QC_T_NONE;
+       } else
+               request_count = blk_plug_queued_count(q);
  
         spin_lock_irq(q->queue_lock);
  
@@ -1652,7 +1774,8 @@ get_rq:
          */
         req = get_request(q, rw_flags, bio, GFP_NOIO);
         if (IS_ERR(req)) {
-               bio_endio(bio, PTR_ERR(req));   /* @q is dead */
+               bio->bi_error = PTR_ERR(req);
+               bio_endio(bio);
                 goto out_unlock;
         }
  
@@ -1690,6 +1813,8 @@ get_rq:
  out_unlock:
                 spin_unlock_irq(q->queue_lock);
         }
+
+       return BLK_QC_T_NONE;
  }
  
  /*
@@ -1721,8 +1846,6 @@ static void handle_bad_sector(struct bio *bio)
                         bio->bi_rw,
                         (unsigned long long)bio_end_sector(bio),
                         (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
-
-       set_bit(BIO_EOF, &bio->bi_flags);
  }
  
  #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -1813,15 +1936,6 @@ generic_make_request_checks(struct bio *bio)
                 goto end_io;
         }
  
-       if (likely(bio_is_rw(bio) &&
-                  nr_sectors > queue_max_hw_sectors(q))) {
-               printk(KERN_ERR "bio too big device %s (%u > %u)\n",
-                      bdevname(bio->bi_bdev, b),
-                      bio_sectors(bio),
-                      queue_max_hw_sectors(q));
-               goto end_io;
-       }
-
         part = bio->bi_bdev->bd_part;
         if (should_fail_request(part, bio->bi_iter.bi_size) ||
             should_fail_request(&part_to_disk(part)->part0,
@@ -1870,14 +1984,15 @@ generic_make_request_checks(struct bio *bio)
          */
         create_io_context(GFP_ATOMIC, q->node);
  
-       if (blk_throtl_bio(q, bio))
-               return false;   /* throttled, will be resubmitted later */
+       if (!blkcg_bio_issue_check(q, bio))
+               return false;
  
         trace_block_bio_queue(q, bio);
         return true;
  
  end_io:
-       bio_endio(bio, err);
+       bio->bi_error = err;
+       bio_endio(bio);
         return false;
  }
  
@@ -1905,12 +2020,13 @@ end_io:
   * a lower device by calling into generic_make_request recursively, which
   * means the bio should NOT be touched after the call to ->make_request_fn.
   */
-void generic_make_request(struct bio *bio)
+blk_qc_t generic_make_request(struct bio *bio)
  {
         struct bio_list bio_list_on_stack;
+       blk_qc_t ret = BLK_QC_T_NONE;
  
         if (!generic_make_request_checks(bio))
-               return;
+               goto out;
  
         /*
          * We only want one ->make_request_fn to be active at a time, else
@@ -1924,7 +2040,7 @@ void generic_make_request(struct bio *bio)
          */
         if (current->bio_list) {
                 bio_list_add(current->bio_list, bio);
-               return;
+               goto out;
         }
  
         /* following loop may be a bit non-obvious, and so deserves some
@@ -1947,11 +2063,24 @@ void generic_make_request(struct bio *bio)
         do {
                 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
  
-               q->make_request_fn(q, bio);
+               if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
+
+                       ret = q->make_request_fn(q, bio);
  
-               bio = bio_list_pop(current->bio_list);
+                       blk_queue_exit(q);
+
+                       bio = bio_list_pop(current->bio_list);
+               } else {
+                       struct bio *bio_next = bio_list_pop(current->bio_list);
+
+                       bio_io_error(bio);
+                       bio = bio_next;
+               }
         } while (bio);
         current->bio_list = NULL; /* deactivate */
+
+out:
+       return ret;
  }
  EXPORT_SYMBOL(generic_make_request);
  
@@ -1965,7 +2094,7 @@ EXPORT_SYMBOL(generic_make_request);
   * interfaces; @bio must be presetup and ready for I/O.
   *
   */
-void submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(int rw, struct bio *bio)
  {
         bio->bi_rw |= rw;
  
@@ -1999,12 +2128,13 @@ void submit_bio(int rw, struct bio *bio)
                 }
         }
  
-       generic_make_request(bio);
+       return generic_make_request(bio);
  }
  EXPORT_SYMBOL(submit_bio);
  
  /**
- * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * blk_cloned_rq_check_limits - Helper function to check a cloned request
+ *                              for new the queue limits
   * @q:  the queue
   * @rq: the request being checked
   *
@@ -2015,20 +2145,13 @@ EXPORT_SYMBOL(submit_bio);
   *    after it is inserted to @q, it should be checked against @q before
   *    the insertion using this generic function.
   *
- *    This function should also be useful for request stacking drivers
- *    in some cases below, so export this function.
   *    Request stacking drivers like request-based dm may change the queue
- *    limits while requests are in the queue (e.g. dm's table swapping).
- *    Such request stacking drivers should check those requests against
- *    the new queue limits again when they dispatch those requests,
- *    although such checkings are also done against the old queue limits
- *    when submitting requests.
+ *    limits when retrying requests on other queues. Those requests need
+ *    to be checked against the new queue limits again during dispatch.
   */
-int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+static int blk_cloned_rq_check_limits(struct request_queue *q,
+                                     struct request *rq)
  {
-       if (!rq_mergeable(rq))
-               return 0;
-
         if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
                 printk(KERN_ERR "%s: over max size limit.\n", __func__);
                 return -EIO;
@@ -2048,7 +2171,6 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
  
         return 0;
  }
-EXPORT_SYMBOL_GPL(blk_rq_check_limits);
  
  /**
   * blk_insert_cloned_request - Helper for stacking drivers to submit a request
@@ -2060,7 +2182,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
         unsigned long flags;
         int where = ELEVATOR_INSERT_BACK;
  
-       if (blk_rq_check_limits(q, rq))
+       if (blk_cloned_rq_check_limits(q, rq))
                 return -EIO;
  
         if (rq->rq_disk &&
@@ -3037,21 +3159,20 @@ void blk_start_plug(struct blk_plug *plug)
  {
         struct task_struct *tsk = current;
  
+       /*
+        * If this is a nested plug, don't actually assign it.
+        */
+       if (tsk->plug)
+               return;
+
         INIT_LIST_HEAD(&plug->list);
         INIT_LIST_HEAD(&plug->mq_list);
         INIT_LIST_HEAD(&plug->cb_list);
-
         /*
-        * If this is a nested plug, don't actually assign it. It will be
-        * flushed on its own.
+        * Store ordering should not be needed here, since a potential
+        * preempt will imply a full memory barrier
          */
-       if (!tsk->plug) {
-               /*
-                * Store ordering should not be needed here, since a potential
-                * preempt will imply a full memory barrier
-                */
-               tsk->plug = plug;
-       }
+       tsk->plug = plug;
  }
  EXPORT_SYMBOL(blk_start_plug);
  
@@ -3190,13 +3311,55 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  
  void blk_finish_plug(struct blk_plug *plug)
  {
+       if (plug != current->plug)
+               return;
         blk_flush_plug_list(plug, false);
  
-       if (plug == current->plug)
-               current->plug = NULL;
+       current->plug = NULL;
  }
  EXPORT_SYMBOL(blk_finish_plug);
  
+bool blk_poll(struct request_queue *q, blk_qc_t cookie)
+{
+       struct blk_plug *plug;
+       long state;
+
+       if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
+           !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+               return false;
+
+       plug = current->plug;
+       if (plug)
+               blk_flush_plug_list(plug, false);
+
+       state = current->state;
+       while (!need_resched()) {
+               unsigned int queue_num = blk_qc_t_to_queue_num(cookie);
+               struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num];
+               int ret;
+
+               hctx->poll_invoked++;
+
+               ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
+               if (ret > 0) {
+                       hctx->poll_success++;
+                       set_current_state(TASK_RUNNING);
+                       return true;
+               }
+
+               if (signal_pending_state(state, current))
+                       set_current_state(TASK_RUNNING);
+
+               if (current->state == TASK_RUNNING)
+                       return true;
+               if (ret < 0)
+                       break;
+               cpu_relax();
+       }
+
+       return false;
+}
+
  #ifdef CONFIG_PM
  /**
   * blk_pm_runtime_init - Block layer runtime PM initialization routine
@@ -3253,6 +3416,9 @@ int blk_pre_runtime_suspend(struct request_queue *q)
  {
         int ret = 0;
  
+       if (!q->dev)
+               return ret;
+
         spin_lock_irq(q->queue_lock);
         if (q->nr_pending) {
                 ret = -EBUSY;
@@ -3280,6 +3446,9 @@ EXPORT_SYMBOL(blk_pre_runtime_suspend);
   */
  void blk_post_runtime_suspend(struct request_queue *q, int err)
  {
+       if (!q->dev)
+               return;
+
         spin_lock_irq(q->queue_lock);
         if (!err) {
                 q->rpm_status = RPM_SUSPENDED;
@@ -3304,6 +3473,9 @@ EXPORT_SYMBOL(blk_post_runtime_suspend);
   */
  void blk_pre_runtime_resume(struct request_queue *q)
  {
+       if (!q->dev)
+               return;
+
         spin_lock_irq(q->queue_lock);
         q->rpm_status = RPM_RESUMING;
         spin_unlock_irq(q->queue_lock);
@@ -3326,6 +3498,9 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
   */
  void blk_post_runtime_resume(struct request_queue *q, int err)
  {
+       if (!q->dev)
+               return;
+
         spin_lock_irq(q->queue_lock);
         if (!err) {
                 q->rpm_status = RPM_ACTIVE;
@@ -3343,7 +3518,7 @@ EXPORT_SYMBOL(blk_post_runtime_resume);
  int __init blk_dev_init(void)
  {
         BUILD_BUG_ON(__REQ_NR_BITS > 8 *
-                       sizeof(((struct request *)0)->cmd_flags));
+                       FIELD_SIZEOF(struct request, cmd_flags));
  
         /* used for unplugging and affects IO latency/throughput - HIGHPRI */
         kblockd_workqueue = alloc_workqueue("kblockd",