kernel/kernel/locking/rtmutex.c

   1 /*
   2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   3  *
   4  * started by Ingo Molnar and Thomas Gleixner.
   5  *
   6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   9  *  Copyright (C) 2006 Esben Nielsen
  10  *  Adaptive Spinlocks:
  11  *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
  12  *                                   and Peter Morreale,
  13  * Adaptive Spinlocks simplification:
  14  *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
  15  *
  16  *  See Documentation/locking/rt-mutex-design.txt for details.
  17  */
  18 #include <linux/spinlock.h>
  19 #include <linux/export.h>
  20 #include <linux/sched.h>
  21 #include <linux/sched/rt.h>
  22 #include <linux/sched/deadline.h>
  23 #include <linux/timer.h>
  24 #include <linux/ww_mutex.h>
  25
  26 #include "rtmutex_common.h"
  27
  28 /*
  29  * lock->owner state tracking:
  30  *
  31  * lock->owner holds the task_struct pointer of the owner. Bit 0
  32  * is used to keep track of the "lock has waiters" state.
  33  *
  34  * owner        bit0
  35  * NULL         0       lock is free (fast acquire possible)
  36  * NULL         1       lock is free and has waiters and the top waiter
  37  *                              is going to take the lock*
  38  * taskpointer  0       lock is held (fast release possible)
  39  * taskpointer  1       lock is held and has waiters**
  40  *
  41  * The fast atomic compare exchange based acquire and release is only
  42  * possible when bit 0 of lock->owner is 0.
  43  *
  44  * (*) It also can be a transitional state when grabbing the lock
  45  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  46  * we need to set the bit0 before looking at the lock, and the owner may be
  47  * NULL in this small time, hence this can be a transitional state.
  48  *
  49  * (**) There is a small time when bit 0 is set but there are no
  50  * waiters. This can happen when grabbing the lock in the slow path.
  51  * To prevent a cmpxchg of the owner releasing the lock, we need to
  52  * set this bit before looking at the lock.
  53  */
  54
  55 static void
  56 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
  57 {
  58         unsigned long val = (unsigned long)owner;
  59
  60         if (rt_mutex_has_waiters(lock))
  61                 val |= RT_MUTEX_HAS_WAITERS;
  62
  63         lock->owner = (struct task_struct *)val;
  64 }
  65
  66 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
  67 {
  68         lock->owner = (struct task_struct *)
  69                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  70 }
  71
  72 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  73 {
  74         if (!rt_mutex_has_waiters(lock))
  75                 clear_rt_mutex_waiters(lock);
  76 }
  77
  78 static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
  79 {
  80         return waiter && waiter != PI_WAKEUP_INPROGRESS &&
  81                 waiter != PI_REQUEUE_INPROGRESS;
  82 }
  83
  84 /*
  85  * We can speed up the acquire/release, if there's no debugging state to be
  86  * set up.
  87  */
  88 #ifndef CONFIG_DEBUG_RT_MUTEXES
  89 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
  90 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
  91 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
  92
  93 /*
  94  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
  95  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
  96  * relaxed semantics suffice.
  97  */
  98 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  99 {
 100         unsigned long owner, *p = (unsigned long *) &lock->owner;
 101
 102         do {
 103                 owner = *p;
 104         } while (cmpxchg_relaxed(p, owner,
 105                                  owner | RT_MUTEX_HAS_WAITERS) != owner);
 106 }
 107
 108 /*
 109  * Safe fastpath aware unlock:
 110  * 1) Clear the waiters bit
 111  * 2) Drop lock->wait_lock
 112  * 3) Try to unlock the lock with cmpxchg
 113  */
 114 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 115                                         unsigned long flags)
 116         __releases(lock->wait_lock)
 117 {
 118         struct task_struct *owner = rt_mutex_owner(lock);
 119
 120         clear_rt_mutex_waiters(lock);
 121         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 122         /*
 123          * If a new waiter comes in between the unlock and the cmpxchg
 124          * we have two situations:
 125          *
 126          * unlock(wait_lock);
 127          *                                      lock(wait_lock);
 128          * cmpxchg(p, owner, 0) == owner
 129          *                                      mark_rt_mutex_waiters(lock);
 130          *                                      acquire(lock);
 131          * or:
 132          *
 133          * unlock(wait_lock);
 134          *                                      lock(wait_lock);
 135          *                                      mark_rt_mutex_waiters(lock);
 136          *
 137          * cmpxchg(p, owner, 0) != owner
 138          *                                      enqueue_waiter();
 139          *                                      unlock(wait_lock);
 140          * lock(wait_lock);
 141          * wake waiter();
 142          * unlock(wait_lock);
 143          *                                      lock(wait_lock);
 144          *                                      acquire(lock);
 145          */
 146         return rt_mutex_cmpxchg_release(lock, owner, NULL);
 147 }
 148
 149 #else
 150 # define rt_mutex_cmpxchg_relaxed(l,c,n)        (0)
 151 # define rt_mutex_cmpxchg_acquire(l,c,n)        (0)
 152 # define rt_mutex_cmpxchg_release(l,c,n)        (0)
 153
 154 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 155 {
 156         lock->owner = (struct task_struct *)
 157                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 158 }
 159
 160 /*
 161  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 162  */
 163 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
 164                                         unsigned long flags)
 165         __releases(lock->wait_lock)
 166 {
 167         lock->owner = NULL;
 168         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 169         return true;
 170 }
 171 #endif
 172
 173 static inline int
 174 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
 175                      struct rt_mutex_waiter *right)
 176 {
 177         if (left->prio < right->prio)
 178                 return 1;
 179
 180         /*
 181          * If both waiters have dl_prio(), we check the deadlines of the
 182          * associated tasks.
 183          * If left waiter has a dl_prio(), and we didn't return 1 above,
 184          * then right waiter has a dl_prio() too.
 185          */
 186         if (dl_prio(left->prio))
 187                 return dl_time_before(left->task->dl.deadline,
 188                                       right->task->dl.deadline);
 189
 190         return 0;
 191 }
 192
 193 static void
 194 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 195 {
 196         struct rb_node **link = &lock->waiters.rb_node;
 197         struct rb_node *parent = NULL;
 198         struct rt_mutex_waiter *entry;
 199         int leftmost = 1;
 200
 201         while (*link) {
 202                 parent = *link;
 203                 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
 204                 if (rt_mutex_waiter_less(waiter, entry)) {
 205                         link = &parent->rb_left;
 206                 } else {
 207                         link = &parent->rb_right;
 208                         leftmost = 0;
 209                 }
 210         }
 211
 212         if (leftmost)
 213                 lock->waiters_leftmost = &waiter->tree_entry;
 214
 215         rb_link_node(&waiter->tree_entry, parent, link);
 216         rb_insert_color(&waiter->tree_entry, &lock->waiters);
 217 }
 218
 219 static void
 220 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
 221 {
 222         if (RB_EMPTY_NODE(&waiter->tree_entry))
 223                 return;
 224
 225         if (lock->waiters_leftmost == &waiter->tree_entry)
 226                 lock->waiters_leftmost = rb_next(&waiter->tree_entry);
 227
 228         rb_erase(&waiter->tree_entry, &lock->waiters);
 229         RB_CLEAR_NODE(&waiter->tree_entry);
 230 }
 231
 232 static void
 233 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 234 {
 235         struct rb_node **link = &task->pi_waiters.rb_node;
 236         struct rb_node *parent = NULL;
 237         struct rt_mutex_waiter *entry;
 238         int leftmost = 1;
 239
 240         while (*link) {
 241                 parent = *link;
 242                 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
 243                 if (rt_mutex_waiter_less(waiter, entry)) {
 244                         link = &parent->rb_left;
 245                 } else {
 246                         link = &parent->rb_right;
 247                         leftmost = 0;
 248                 }
 249         }
 250
 251         if (leftmost)
 252                 task->pi_waiters_leftmost = &waiter->pi_tree_entry;
 253
 254         rb_link_node(&waiter->pi_tree_entry, parent, link);
 255         rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
 256 }
 257
 258 static void
 259 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 260 {
 261         if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
 262                 return;
 263
 264         if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
 265                 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
 266
 267         rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
 268         RB_CLEAR_NODE(&waiter->pi_tree_entry);
 269 }
 270
 271 /*
 272  * Calculate task priority from the waiter tree priority
 273  *
 274  * Return task->normal_prio when the waiter tree is empty or when
 275  * the waiter is not allowed to do priority boosting
 276  */
 277 int rt_mutex_getprio(struct task_struct *task)
 278 {
 279         if (likely(!task_has_pi_waiters(task)))
 280                 return task->normal_prio;
 281
 282         return min(task_top_pi_waiter(task)->prio,
 283                    task->normal_prio);
 284 }
 285
 286 struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 287 {
 288         if (likely(!task_has_pi_waiters(task)))
 289                 return NULL;
 290
 291         return task_top_pi_waiter(task)->task;
 292 }
 293
 294 /*
 295  * Called by sched_setscheduler() to get the priority which will be
 296  * effective after the change.
 297  */
 298 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
 299 {
 300         if (!task_has_pi_waiters(task))
 301                 return newprio;
 302
 303         if (task_top_pi_waiter(task)->task->prio <= newprio)
 304                 return task_top_pi_waiter(task)->task->prio;
 305         return newprio;
 306 }
 307
 308 /*
 309  * Adjust the priority of a task, after its pi_waiters got modified.
 310  *
 311  * This can be both boosting and unboosting. task->pi_lock must be held.
 312  */
 313 static void __rt_mutex_adjust_prio(struct task_struct *task)
 314 {
 315         int prio = rt_mutex_getprio(task);
 316
 317         if (task->prio != prio || dl_prio(prio))
 318                 rt_mutex_setprio(task, prio);
 319 }
 320
 321 /*
 322  * Adjust task priority (undo boosting). Called from the exit path of
 323  * rt_mutex_slowunlock() and rt_mutex_slowlock().
 324  *
 325  * (Note: We do this outside of the protection of lock->wait_lock to
 326  * allow the lock to be taken while or before we readjust the priority
 327  * of task. We do not use the spin_xx_mutex() variants here as we are
 328  * outside of the debug path.)
 329  */
 330 void rt_mutex_adjust_prio(struct task_struct *task)
 331 {
 332         unsigned long flags;
 333
 334         raw_spin_lock_irqsave(&task->pi_lock, flags);
 335         __rt_mutex_adjust_prio(task);
 336         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 337 }
 338
 339 /*
 340  * Deadlock detection is conditional:
 341  *
 342  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
 343  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
 344  *
 345  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
 346  * conducted independent of the detect argument.
 347  *
 348  * If the waiter argument is NULL this indicates the deboost path and
 349  * deadlock detection is disabled independent of the detect argument
 350  * and the config settings.
 351  */
 352 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
 353                                           enum rtmutex_chainwalk chwalk)
 354 {
 355         /*
 356          * This is just a wrapper function for the following call,
 357          * because debug_rt_mutex_detect_deadlock() smells like a magic
 358          * debug feature and I wanted to keep the cond function in the
 359          * main source file along with the comments instead of having
 360          * two of the same in the headers.
 361          */
 362         return debug_rt_mutex_detect_deadlock(waiter, chwalk);
 363 }
 364
 365 static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
 366 {
 367         if (waiter->savestate)
 368                 wake_up_lock_sleeper(waiter->task);
 369         else
 370                 wake_up_process(waiter->task);
 371 }
 372
 373 /*
 374  * Max number of times we'll walk the boosting chain:
 375  */
 376 int max_lock_depth = 1024;
 377
 378 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
 379 {
 380         return rt_mutex_real_waiter(p->pi_blocked_on) ?
 381                 p->pi_blocked_on->lock : NULL;
 382 }
 383
 384 /*
 385  * Adjust the priority chain. Also used for deadlock detection.
 386  * Decreases task's usage by one - may thus free the task.
 387  *
 388  * @task:       the task owning the mutex (owner) for which a chain walk is
 389  *              probably needed
 390  * @chwalk:     do we have to carry out deadlock detection?
 391  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
 392  *              things for a task that has just got its priority adjusted, and
 393  *              is waiting on a mutex)
 394  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
 395  *              we dropped its pi_lock. Is never dereferenced, only used for
 396  *              comparison to detect lock chain changes.
 397  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
 398  *              its priority to the mutex owner (can be NULL in the case
 399  *              depicted above or if the top waiter is gone away and we are
 400  *              actually deboosting the owner)
 401  * @top_task:   the current top waiter
 402  *
 403  * Returns 0 or -EDEADLK.
 404  *
 405  * Chain walk basics and protection scope
 406  *
 407  * [R] refcount on task
 408  * [P] task->pi_lock held
 409  * [L] rtmutex->wait_lock held
 410  *
 411  * Step Description                             Protected by
 412  *      function arguments:
 413  *      @task                                   [R]
 414  *      @orig_lock if != NULL                   @top_task is blocked on it
 415  *      @next_lock                              Unprotected. Cannot be
 416  *                                              dereferenced. Only used for
 417  *                                              comparison.
 418  *      @orig_waiter if != NULL                 @top_task is blocked on it
 419  *      @top_task                               current, or in case of proxy
 420  *                                              locking protected by calling
 421  *                                              code
 422  *      again:
 423  *        loop_sanity_check();
 424  *      retry:
 425  * [1]    lock(task->pi_lock);                  [R] acquire [P]
 426  * [2]    waiter = task->pi_blocked_on;         [P]
 427  * [3]    check_exit_conditions_1();            [P]
 428  * [4]    lock = waiter->lock;                  [P]
 429  * [5]    if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
 430  *          unlock(task->pi_lock);              release [P]
 431  *          goto retry;
 432  *        }
 433  * [6]    check_exit_conditions_2();            [P] + [L]
 434  * [7]    requeue_lock_waiter(lock, waiter);    [P] + [L]
 435  * [8]    unlock(task->pi_lock);                release [P]
 436  *        put_task_struct(task);                release [R]
 437  * [9]    check_exit_conditions_3();            [L]
 438  * [10]   task = owner(lock);                   [L]
 439  *        get_task_struct(task);                [L] acquire [R]
 440  *        lock(task->pi_lock);                  [L] acquire [P]
 441  * [11]   requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
 442  * [12]   check_exit_conditions_4();            [P] + [L]
 443  * [13]   unlock(task->pi_lock);                release [P]
 444  *        unlock(lock->wait_lock);              release [L]
 445  *        goto again;
 446  */
 447 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 448                                       enum rtmutex_chainwalk chwalk,
 449                                       struct rt_mutex *orig_lock,
 450                                       struct rt_mutex *next_lock,
 451                                       struct rt_mutex_waiter *orig_waiter,
 452                                       struct task_struct *top_task)
 453 {
 454         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 455         struct rt_mutex_waiter *prerequeue_top_waiter;
 456         int ret = 0, depth = 0;
 457         struct rt_mutex *lock;
 458         bool detect_deadlock;
 459         bool requeue = true;
 460
 461         detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
 462
 463         /*
 464          * The (de)boosting is a step by step approach with a lot of
 465          * pitfalls. We want this to be preemptible and we want hold a
 466          * maximum of two locks per step. So we have to check
 467          * carefully whether things change under us.
 468          */
 469  again:
 470         /*
 471          * We limit the lock chain length for each invocation.
 472          */
 473         if (++depth > max_lock_depth) {
 474                 static int prev_max;
 475
 476                 /*
 477                  * Print this only once. If the admin changes the limit,
 478                  * print a new message when reaching the limit again.
 479                  */
 480                 if (prev_max != max_lock_depth) {
 481                         prev_max = max_lock_depth;
 482                         printk(KERN_WARNING "Maximum lock depth %d reached "
 483                                "task: %s (%d)\n", max_lock_depth,
 484                                top_task->comm, task_pid_nr(top_task));
 485                 }
 486                 put_task_struct(task);
 487
 488                 return -EDEADLK;
 489         }
 490
 491         /*
 492          * We are fully preemptible here and only hold the refcount on
 493          * @task. So everything can have changed under us since the
 494          * caller or our own code below (goto retry/again) dropped all
 495          * locks.
 496          */
 497  retry:
 498         /*
 499          * [1] Task cannot go away as we did a get_task() before !
 500          */
 501         raw_spin_lock_irq(&task->pi_lock);
 502
 503         /*
 504          * [2] Get the waiter on which @task is blocked on.
 505          */
 506         waiter = task->pi_blocked_on;
 507
 508         /*
 509          * [3] check_exit_conditions_1() protected by task->pi_lock.
 510          */
 511
 512         /*
 513          * Check whether the end of the boosting chain has been
 514          * reached or the state of the chain has changed while we
 515          * dropped the locks.
 516          */
 517         if (!rt_mutex_real_waiter(waiter))
 518                 goto out_unlock_pi;
 519
 520         /*
 521          * Check the orig_waiter state. After we dropped the locks,
 522          * the previous owner of the lock might have released the lock.
 523          */
 524         if (orig_waiter && !rt_mutex_owner(orig_lock))
 525                 goto out_unlock_pi;
 526
 527         /*
 528          * We dropped all locks after taking a refcount on @task, so
 529          * the task might have moved on in the lock chain or even left
 530          * the chain completely and blocks now on an unrelated lock or
 531          * on @orig_lock.
 532          *
 533          * We stored the lock on which @task was blocked in @next_lock,
 534          * so we can detect the chain change.
 535          */
 536         if (next_lock != waiter->lock)
 537                 goto out_unlock_pi;
 538
 539         /*
 540          * Drop out, when the task has no waiters. Note,
 541          * top_waiter can be NULL, when we are in the deboosting
 542          * mode!
 543          */
 544         if (top_waiter) {
 545                 if (!task_has_pi_waiters(task))
 546                         goto out_unlock_pi;
 547                 /*
 548                  * If deadlock detection is off, we stop here if we
 549                  * are not the top pi waiter of the task. If deadlock
 550                  * detection is enabled we continue, but stop the
 551                  * requeueing in the chain walk.
 552                  */
 553                 if (top_waiter != task_top_pi_waiter(task)) {
 554                         if (!detect_deadlock)
 555                                 goto out_unlock_pi;
 556                         else
 557                                 requeue = false;
 558                 }
 559         }
 560
 561         /*
 562          * If the waiter priority is the same as the task priority
 563          * then there is no further priority adjustment necessary.  If
 564          * deadlock detection is off, we stop the chain walk. If its
 565          * enabled we continue, but stop the requeueing in the chain
 566          * walk.
 567          */
 568         if (waiter->prio == task->prio) {
 569                 if (!detect_deadlock)
 570                         goto out_unlock_pi;
 571                 else
 572                         requeue = false;
 573         }
 574
 575         /*
 576          * [4] Get the next lock
 577          */
 578         lock = waiter->lock;
 579         /*
 580          * [5] We need to trylock here as we are holding task->pi_lock,
 581          * which is the reverse lock order versus the other rtmutex
 582          * operations.
 583          */
 584         if (!raw_spin_trylock(&lock->wait_lock)) {
 585                 raw_spin_unlock_irq(&task->pi_lock);
 586                 cpu_relax();
 587                 goto retry;
 588         }
 589
 590         /*
 591          * [6] check_exit_conditions_2() protected by task->pi_lock and
 592          * lock->wait_lock.
 593          *
 594          * Deadlock detection. If the lock is the same as the original
 595          * lock which caused us to walk the lock chain or if the
 596          * current lock is owned by the task which initiated the chain
 597          * walk, we detected a deadlock.
 598          */
 599         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 600                 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
 601                 raw_spin_unlock(&lock->wait_lock);
 602                 ret = -EDEADLK;
 603                 goto out_unlock_pi;
 604         }
 605
 606         /*
 607          * If we just follow the lock chain for deadlock detection, no
 608          * need to do all the requeue operations. To avoid a truckload
 609          * of conditionals around the various places below, just do the
 610          * minimum chain walk checks.
 611          */
 612         if (!requeue) {
 613                 /*
 614                  * No requeue[7] here. Just release @task [8]
 615                  */
 616                 raw_spin_unlock(&task->pi_lock);
 617                 put_task_struct(task);
 618
 619                 /*
 620                  * [9] check_exit_conditions_3 protected by lock->wait_lock.
 621                  * If there is no owner of the lock, end of chain.
 622                  */
 623                 if (!rt_mutex_owner(lock)) {
 624                         raw_spin_unlock_irq(&lock->wait_lock);
 625                         return 0;
 626                 }
 627
 628                 /* [10] Grab the next task, i.e. owner of @lock */
 629                 task = rt_mutex_owner(lock);
 630                 get_task_struct(task);
 631                 raw_spin_lock(&task->pi_lock);
 632
 633                 /*
 634                  * No requeue [11] here. We just do deadlock detection.
 635                  *
 636                  * [12] Store whether owner is blocked
 637                  * itself. Decision is made after dropping the locks
 638                  */
 639                 next_lock = task_blocked_on_lock(task);
 640                 /*
 641                  * Get the top waiter for the next iteration
 642                  */
 643                 top_waiter = rt_mutex_top_waiter(lock);
 644
 645                 /* [13] Drop locks */
 646                 raw_spin_unlock(&task->pi_lock);
 647                 raw_spin_unlock_irq(&lock->wait_lock);
 648
 649                 /* If owner is not blocked, end of chain. */
 650                 if (!next_lock)
 651                         goto out_put_task;
 652                 goto again;
 653         }
 654
 655         /*
 656          * Store the current top waiter before doing the requeue
 657          * operation on @lock. We need it for the boost/deboost
 658          * decision below.
 659          */
 660         prerequeue_top_waiter = rt_mutex_top_waiter(lock);
 661
 662         /* [7] Requeue the waiter in the lock waiter tree. */
 663         rt_mutex_dequeue(lock, waiter);
 664         waiter->prio = task->prio;
 665         rt_mutex_enqueue(lock, waiter);
 666
 667         /* [8] Release the task */
 668         raw_spin_unlock(&task->pi_lock);
 669         put_task_struct(task);
 670
 671         /*
 672          * [9] check_exit_conditions_3 protected by lock->wait_lock.
 673          *
 674          * We must abort the chain walk if there is no lock owner even
 675          * in the dead lock detection case, as we have nothing to
 676          * follow here. This is the end of the chain we are walking.
 677          */
 678         if (!rt_mutex_owner(lock)) {
 679                 struct rt_mutex_waiter *lock_top_waiter;
 680
 681                 /*
 682                  * If the requeue [7] above changed the top waiter,
 683                  * then we need to wake the new top waiter up to try
 684                  * to get the lock.
 685                  */
 686                 lock_top_waiter = rt_mutex_top_waiter(lock);
 687                 if (prerequeue_top_waiter != lock_top_waiter)
 688                         rt_mutex_wake_waiter(lock_top_waiter);
 689                 raw_spin_unlock_irq(&lock->wait_lock);
 690                 return 0;
 691         }
 692
 693         /* [10] Grab the next task, i.e. the owner of @lock */
 694         task = rt_mutex_owner(lock);
 695         get_task_struct(task);
 696         raw_spin_lock(&task->pi_lock);
 697
 698         /* [11] requeue the pi waiters if necessary */
 699         if (waiter == rt_mutex_top_waiter(lock)) {
 700                 /*
 701                  * The waiter became the new top (highest priority)
 702                  * waiter on the lock. Replace the previous top waiter
 703                  * in the owner tasks pi waiters tree with this waiter
 704                  * and adjust the priority of the owner.
 705                  */
 706                 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
 707                 rt_mutex_enqueue_pi(task, waiter);
 708                 __rt_mutex_adjust_prio(task);
 709
 710         } else if (prerequeue_top_waiter == waiter) {
 711                 /*
 712                  * The waiter was the top waiter on the lock, but is
 713                  * no longer the top prority waiter. Replace waiter in
 714                  * the owner tasks pi waiters tree with the new top
 715                  * (highest priority) waiter and adjust the priority
 716                  * of the owner.
 717                  * The new top waiter is stored in @waiter so that
 718                  * @waiter == @top_waiter evaluates to true below and
 719                  * we continue to deboost the rest of the chain.
 720                  */
 721                 rt_mutex_dequeue_pi(task, waiter);
 722                 waiter = rt_mutex_top_waiter(lock);
 723                 rt_mutex_enqueue_pi(task, waiter);
 724                 __rt_mutex_adjust_prio(task);
 725         } else {
 726                 /*
 727                  * Nothing changed. No need to do any priority
 728                  * adjustment.
 729                  */
 730         }
 731
 732         /*
 733          * [12] check_exit_conditions_4() protected by task->pi_lock
 734          * and lock->wait_lock. The actual decisions are made after we
 735          * dropped the locks.
 736          *
 737          * Check whether the task which owns the current lock is pi
 738          * blocked itself. If yes we store a pointer to the lock for
 739          * the lock chain change detection above. After we dropped
 740          * task->pi_lock next_lock cannot be dereferenced anymore.
 741          */
 742         next_lock = task_blocked_on_lock(task);
 743         /*
 744          * Store the top waiter of @lock for the end of chain walk
 745          * decision below.
 746          */
 747         top_waiter = rt_mutex_top_waiter(lock);
 748
 749         /* [13] Drop the locks */
 750         raw_spin_unlock(&task->pi_lock);
 751         raw_spin_unlock_irq(&lock->wait_lock);
 752
 753         /*
 754          * Make the actual exit decisions [12], based on the stored
 755          * values.
 756          *
 757          * We reached the end of the lock chain. Stop right here. No
 758          * point to go back just to figure that out.
 759          */
 760         if (!next_lock)
 761                 goto out_put_task;
 762
 763         /*
 764          * If the current waiter is not the top waiter on the lock,
 765          * then we can stop the chain walk here if we are not in full
 766          * deadlock detection mode.
 767          */
 768         if (!detect_deadlock && waiter != top_waiter)
 769                 goto out_put_task;
 770
 771         goto again;
 772
 773  out_unlock_pi:
 774         raw_spin_unlock_irq(&task->pi_lock);
 775  out_put_task:
 776         put_task_struct(task);
 777
 778         return ret;
 779 }
 780
 781
 782 #define STEAL_NORMAL  0
 783 #define STEAL_LATERAL 1
 784
 785 /*
 786  * Note that RT tasks are excluded from lateral-steals to prevent the
 787  * introduction of an unbounded latency
 788  */
 789 static inline int lock_is_stealable(struct task_struct *task,
 790                                     struct task_struct *pendowner, int mode)
 791 {
 792     if (mode == STEAL_NORMAL || rt_task(task)) {
 793             if (task->prio >= pendowner->prio)
 794                     return 0;
 795     } else if (task->prio > pendowner->prio)
 796             return 0;
 797     return 1;
 798 }
 799
 800 /*
 801  * Try to take an rt-mutex
 802  *
 803  * Must be called with lock->wait_lock held and interrupts disabled
 804  *
 805  * @lock:   The lock to be acquired.
 806  * @task:   The task which wants to acquire the lock
 807  * @waiter: The waiter that is queued to the lock's wait tree if the
 808  *          callsite called task_blocked_on_lock(), otherwise NULL
 809  */
 810 static int __try_to_take_rt_mutex(struct rt_mutex *lock,
 811                                   struct task_struct *task,
 812                                   struct rt_mutex_waiter *waiter, int mode)
 813 {
 814         /*
 815          * Before testing whether we can acquire @lock, we set the
 816          * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
 817          * other tasks which try to modify @lock into the slow path
 818          * and they serialize on @lock->wait_lock.
 819          *
 820          * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
 821          * as explained at the top of this file if and only if:
 822          *
 823          * - There is a lock owner. The caller must fixup the
 824          *   transient state if it does a trylock or leaves the lock
 825          *   function due to a signal or timeout.
 826          *
 827          * - @task acquires the lock and there are no other
 828          *   waiters. This is undone in rt_mutex_set_owner(@task) at
 829          *   the end of this function.
 830          */
 831         mark_rt_mutex_waiters(lock);
 832
 833         /*
 834          * If @lock has an owner, give up.
 835          */
 836         if (rt_mutex_owner(lock))
 837                 return 0;
 838
 839         /*
 840          * If @waiter != NULL, @task has already enqueued the waiter
 841          * into @lock waiter tree. If @waiter == NULL then this is a
 842          * trylock attempt.
 843          */
 844         if (waiter) {
 845                 /*
 846                  * If waiter is not the highest priority waiter of
 847                  * @lock, give up.
 848                  */
 849                 if (waiter != rt_mutex_top_waiter(lock)) {
 850                         /* XXX lock_is_stealable() ? */
 851                         return 0;
 852                 }
 853
 854                 /*
 855                  * We can acquire the lock. Remove the waiter from the
 856                  * lock waiters tree.
 857                  */
 858                 rt_mutex_dequeue(lock, waiter);
 859
 860         } else {
 861                 /*
 862                  * If the lock has waiters already we check whether @task is
 863                  * eligible to take over the lock.
 864                  *
 865                  * If there are no other waiters, @task can acquire
 866                  * the lock.  @task->pi_blocked_on is NULL, so it does
 867                  * not need to be dequeued.
 868                  */
 869                 if (rt_mutex_has_waiters(lock)) {
 870                         struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
 871
 872                         if (task != pown && !lock_is_stealable(task, pown, mode))
 873                                 return 0;
 874                         /*
 875                          * The current top waiter stays enqueued. We
 876                          * don't have to change anything in the lock
 877                          * waiters order.
 878                          */
 879                 } else {
 880                         /*
 881                          * No waiters. Take the lock without the
 882                          * pi_lock dance.@task->pi_blocked_on is NULL
 883                          * and we have no waiters to enqueue in @task
 884                          * pi waiters tree.
 885                          */
 886                         goto takeit;
 887                 }
 888         }
 889
 890         /*
 891          * Clear @task->pi_blocked_on. Requires protection by
 892          * @task->pi_lock. Redundant operation for the @waiter == NULL
 893          * case, but conditionals are more expensive than a redundant
 894          * store.
 895          */
 896         raw_spin_lock(&task->pi_lock);
 897         task->pi_blocked_on = NULL;
 898         /*
 899          * Finish the lock acquisition. @task is the new owner. If
 900          * other waiters exist we have to insert the highest priority
 901          * waiter into @task->pi_waiters tree.
 902          */
 903         if (rt_mutex_has_waiters(lock))
 904                 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
 905         raw_spin_unlock(&task->pi_lock);
 906
 907 takeit:
 908         /* We got the lock. */
 909         debug_rt_mutex_lock(lock);
 910
 911         /*
 912          * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
 913          * are still waiters or clears it.
 914          */
 915         rt_mutex_set_owner(lock, task);
 916
 917         rt_mutex_deadlock_account_lock(lock, task);
 918
 919         return 1;
 920 }
 921
 922 #ifdef CONFIG_PREEMPT_RT_FULL
 923 /*
 924  * preemptible spin_lock functions:
 925  */
 926 static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
 927                                          void  (*slowfn)(struct rt_mutex *lock,
 928                                                          bool mg_off),
 929                                          bool do_mig_dis)
 930 {
 931         might_sleep_no_state_check();
 932
 933         if (do_mig_dis)
 934                 migrate_disable();
 935
 936         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
 937                 rt_mutex_deadlock_account_lock(lock, current);
 938         else
 939                 slowfn(lock, do_mig_dis);
 940 }
 941
 942 static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
 943                                            void  (*slowfn)(struct rt_mutex *lock))
 944 {
 945         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
 946                 rt_mutex_deadlock_account_unlock(current);
 947         else
 948                 slowfn(lock);
 949 }
 950 #ifdef CONFIG_SMP
 951 /*
 952  * Note that owner is a speculative pointer and dereferencing relies
 953  * on rcu_read_lock() and the check against the lock owner.
 954  */
 955 static int adaptive_wait(struct rt_mutex *lock,
 956                          struct task_struct *owner)
 957 {
 958         int res = 0;
 959
 960         rcu_read_lock();
 961         for (;;) {
 962                 if (owner != rt_mutex_owner(lock))
 963                         break;
 964                 /*
 965                  * Ensure that owner->on_cpu is dereferenced _after_
 966                  * checking the above to be valid.
 967                  */
 968                 barrier();
 969                 if (!owner->on_cpu) {
 970                         res = 1;
 971                         break;
 972                 }
 973                 cpu_relax();
 974         }
 975         rcu_read_unlock();
 976         return res;
 977 }
 978 #else
 979 static int adaptive_wait(struct rt_mutex *lock,
 980                          struct task_struct *orig_owner)
 981 {
 982         return 1;
 983 }
 984 #endif
 985
 986 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 987                                    struct rt_mutex_waiter *waiter,
 988                                    struct task_struct *task,
 989                                    enum rtmutex_chainwalk chwalk);
 990 /*
 991  * Slow path lock function spin_lock style: this variant is very
 992  * careful not to miss any non-lock wakeups.
 993  *
 994  * We store the current state under p->pi_lock in p->saved_state and
 995  * the try_to_wake_up() code handles this accordingly.
 996  */
 997 static void  noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
 998                                                     bool mg_off)
 999 {
1000         struct task_struct *lock_owner, *self = current;
1001         struct rt_mutex_waiter waiter, *top_waiter;
1002         unsigned long flags;
1003         int ret;
1004
1005         rt_mutex_init_waiter(&waiter, true);
1006
1007         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1008
1009         if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
1010                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1011                 return;
1012         }
1013
1014         BUG_ON(rt_mutex_owner(lock) == self);
1015
1016         /*
1017          * We save whatever state the task is in and we'll restore it
1018          * after acquiring the lock taking real wakeups into account
1019          * as well. We are serialized via pi_lock against wakeups. See
1020          * try_to_wake_up().
1021          */
1022         raw_spin_lock(&self->pi_lock);
1023         self->saved_state = self->state;
1024         __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
1025         raw_spin_unlock(&self->pi_lock);
1026
1027         ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
1028         BUG_ON(ret);
1029
1030         for (;;) {
1031                 /* Try to acquire the lock again. */
1032                 if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
1033                         break;
1034
1035                 top_waiter = rt_mutex_top_waiter(lock);
1036                 lock_owner = rt_mutex_owner(lock);
1037
1038                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1039
1040                 debug_rt_mutex_print_deadlock(&waiter);
1041
1042                 if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
1043                         if (mg_off)
1044                                 migrate_enable();
1045                         schedule();
1046                         if (mg_off)
1047                                 migrate_disable();
1048                 }
1049
1050                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1051
1052                 raw_spin_lock(&self->pi_lock);
1053                 __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
1054                 raw_spin_unlock(&self->pi_lock);
1055         }
1056
1057         /*
1058          * Restore the task state to current->saved_state. We set it
1059          * to the original state above and the try_to_wake_up() code
1060          * has possibly updated it when a real (non-rtmutex) wakeup
1061          * happened while we were blocked. Clear saved_state so
1062          * try_to_wakeup() does not get confused.
1063          */
1064         raw_spin_lock(&self->pi_lock);
1065         __set_current_state_no_track(self->saved_state);
1066         self->saved_state = TASK_RUNNING;
1067         raw_spin_unlock(&self->pi_lock);
1068
1069         /*
1070          * try_to_take_rt_mutex() sets the waiter bit
1071          * unconditionally. We might have to fix that up:
1072          */
1073         fixup_rt_mutex_waiters(lock);
1074
1075         BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
1076         BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
1077
1078         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1079
1080         debug_rt_mutex_free_waiter(&waiter);
1081 }
1082
1083 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1084                                     struct wake_q_head *wake_sleeper_q,
1085                                     struct rt_mutex *lock);
1086 /*
1087  * Slow path to release a rt_mutex spin_lock style
1088  */
1089 static void  noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
1090 {
1091         unsigned long flags;
1092         WAKE_Q(wake_q);
1093         WAKE_Q(wake_sleeper_q);
1094
1095         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1096
1097         debug_rt_mutex_unlock(lock);
1098
1099         rt_mutex_deadlock_account_unlock(current);
1100
1101         if (!rt_mutex_has_waiters(lock)) {
1102                 lock->owner = NULL;
1103                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1104                 return;
1105         }
1106
1107         mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
1108
1109         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1110         wake_up_q(&wake_q);
1111         wake_up_q_sleeper(&wake_sleeper_q);
1112
1113         /* Undo pi boosting.when necessary */
1114         rt_mutex_adjust_prio(current);
1115 }
1116
1117 void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
1118 {
1119         rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
1120         spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1121 }
1122 EXPORT_SYMBOL(rt_spin_lock__no_mg);
1123
1124 void __lockfunc rt_spin_lock(spinlock_t *lock)
1125 {
1126         rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
1127         spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1128 }
1129 EXPORT_SYMBOL(rt_spin_lock);
1130
1131 void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
1132 {
1133         rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
1134 }
1135 EXPORT_SYMBOL(__rt_spin_lock);
1136
1137 void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
1138 {
1139         rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
1140 }
1141 EXPORT_SYMBOL(__rt_spin_lock__no_mg);
1142
1143 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1144 void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
1145 {
1146         spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
1147         rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
1148 }
1149 EXPORT_SYMBOL(rt_spin_lock_nested);
1150 #endif
1151
1152 void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
1153 {
1154         /* NOTE: we always pass in '1' for nested, for simplicity */
1155         spin_release(&lock->dep_map, 1, _RET_IP_);
1156         rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
1157 }
1158 EXPORT_SYMBOL(rt_spin_unlock__no_mg);
1159
1160 void __lockfunc rt_spin_unlock(spinlock_t *lock)
1161 {
1162         /* NOTE: we always pass in '1' for nested, for simplicity */
1163         spin_release(&lock->dep_map, 1, _RET_IP_);
1164         rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
1165         migrate_enable();
1166 }
1167 EXPORT_SYMBOL(rt_spin_unlock);
1168
1169 void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
1170 {
1171         rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
1172 }
1173 EXPORT_SYMBOL(__rt_spin_unlock);
1174
1175 /*
1176  * Wait for the lock to get unlocked: instead of polling for an unlock
1177  * (like raw spinlocks do), we lock and unlock, to force the kernel to
1178  * schedule if there's contention:
1179  */
1180 void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
1181 {
1182         spin_lock(lock);
1183         spin_unlock(lock);
1184 }
1185 EXPORT_SYMBOL(rt_spin_unlock_wait);
1186
1187 int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
1188 {
1189         return rt_mutex_trylock(lock);
1190 }
1191
1192 int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
1193 {
1194         int ret;
1195
1196         ret = rt_mutex_trylock(&lock->lock);
1197         if (ret)
1198                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1199         return ret;
1200 }
1201 EXPORT_SYMBOL(rt_spin_trylock__no_mg);
1202
1203 int __lockfunc rt_spin_trylock(spinlock_t *lock)
1204 {
1205         int ret;
1206
1207         migrate_disable();
1208         ret = rt_mutex_trylock(&lock->lock);
1209         if (ret)
1210                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1211         else
1212                 migrate_enable();
1213         return ret;
1214 }
1215 EXPORT_SYMBOL(rt_spin_trylock);
1216
1217 int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
1218 {
1219         int ret;
1220
1221         local_bh_disable();
1222         ret = rt_mutex_trylock(&lock->lock);
1223         if (ret) {
1224                 migrate_disable();
1225                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1226         } else
1227                 local_bh_enable();
1228         return ret;
1229 }
1230 EXPORT_SYMBOL(rt_spin_trylock_bh);
1231
1232 int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
1233 {
1234         int ret;
1235
1236         *flags = 0;
1237         ret = rt_mutex_trylock(&lock->lock);
1238         if (ret) {
1239                 migrate_disable();
1240                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1241         }
1242         return ret;
1243 }
1244 EXPORT_SYMBOL(rt_spin_trylock_irqsave);
1245
1246 int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
1247 {
1248         /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
1249         if (atomic_add_unless(atomic, -1, 1))
1250                 return 0;
1251         rt_spin_lock(lock);
1252         if (atomic_dec_and_test(atomic))
1253                 return 1;
1254         rt_spin_unlock(lock);
1255         return 0;
1256 }
1257 EXPORT_SYMBOL(atomic_dec_and_spin_lock);
1258
1259         void
1260 __rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
1261 {
1262 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1263         /*
1264          * Make sure we are not reinitializing a held lock:
1265          */
1266         debug_check_no_locks_freed((void *)lock, sizeof(*lock));
1267         lockdep_init_map(&lock->dep_map, name, key, 0);
1268 #endif
1269 }
1270 EXPORT_SYMBOL(__rt_spin_lock_init);
1271
1272 #endif /* PREEMPT_RT_FULL */
1273
1274 #ifdef CONFIG_PREEMPT_RT_FULL
1275         static inline int __sched
1276 __mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
1277 {
1278         struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
1279         struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
1280
1281         if (!hold_ctx)
1282                 return 0;
1283
1284         if (unlikely(ctx == hold_ctx))
1285                 return -EALREADY;
1286
1287         if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
1288             (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
1289 #ifdef CONFIG_DEBUG_MUTEXES
1290                 DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
1291                 ctx->contending_lock = ww;
1292 #endif
1293                 return -EDEADLK;
1294         }
1295
1296         return 0;
1297 }
1298 #else
1299         static inline int __sched
1300 __mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
1301 {
1302         BUG();
1303         return 0;
1304 }
1305
1306 #endif
1307
1308 static inline int
1309 try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
1310                      struct rt_mutex_waiter *waiter)
1311 {
1312         return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
1313 }
1314
1315 /*
1316  * Task blocks on lock.
1317  *
1318  * Prepare waiter and propagate pi chain
1319  *
1320  * This must be called with lock->wait_lock held and interrupts disabled
1321  */
1322 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
1323                                    struct rt_mutex_waiter *waiter,
1324                                    struct task_struct *task,
1325                                    enum rtmutex_chainwalk chwalk)
1326 {
1327         struct task_struct *owner = rt_mutex_owner(lock);
1328         struct rt_mutex_waiter *top_waiter = waiter;
1329         struct rt_mutex *next_lock;
1330         int chain_walk = 0, res;
1331
1332         /*
1333          * Early deadlock detection. We really don't want the task to
1334          * enqueue on itself just to untangle the mess later. It's not
1335          * only an optimization. We drop the locks, so another waiter
1336          * can come in before the chain walk detects the deadlock. So
1337          * the other will detect the deadlock and return -EDEADLOCK,
1338          * which is wrong, as the other waiter is not in a deadlock
1339          * situation.
1340          */
1341         if (owner == task)
1342                 return -EDEADLK;
1343
1344         raw_spin_lock(&task->pi_lock);
1345
1346         /*
1347          * In the case of futex requeue PI, this will be a proxy
1348          * lock. The task will wake unaware that it is enqueueed on
1349          * this lock. Avoid blocking on two locks and corrupting
1350          * pi_blocked_on via the PI_WAKEUP_INPROGRESS
1351          * flag. futex_wait_requeue_pi() sets this when it wakes up
1352          * before requeue (due to a signal or timeout). Do not enqueue
1353          * the task if PI_WAKEUP_INPROGRESS is set.
1354          */
1355         if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
1356                 raw_spin_unlock(&task->pi_lock);
1357                 return -EAGAIN;
1358         }
1359
1360         BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
1361
1362         __rt_mutex_adjust_prio(task);
1363         waiter->task = task;
1364         waiter->lock = lock;
1365         waiter->prio = task->prio;
1366
1367         /* Get the top priority waiter on the lock */
1368         if (rt_mutex_has_waiters(lock))
1369                 top_waiter = rt_mutex_top_waiter(lock);
1370         rt_mutex_enqueue(lock, waiter);
1371
1372         task->pi_blocked_on = waiter;
1373
1374         raw_spin_unlock(&task->pi_lock);
1375
1376         if (!owner)
1377                 return 0;
1378
1379         raw_spin_lock(&owner->pi_lock);
1380         if (waiter == rt_mutex_top_waiter(lock)) {
1381                 rt_mutex_dequeue_pi(owner, top_waiter);
1382                 rt_mutex_enqueue_pi(owner, waiter);
1383
1384                 __rt_mutex_adjust_prio(owner);
1385                 if (rt_mutex_real_waiter(owner->pi_blocked_on))
1386                         chain_walk = 1;
1387         } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1388                 chain_walk = 1;
1389         }
1390
1391         /* Store the lock on which owner is blocked or NULL */
1392         next_lock = task_blocked_on_lock(owner);
1393
1394         raw_spin_unlock(&owner->pi_lock);
1395         /*
1396          * Even if full deadlock detection is on, if the owner is not
1397          * blocked itself, we can avoid finding this out in the chain
1398          * walk.
1399          */
1400         if (!chain_walk || !next_lock)
1401                 return 0;
1402
1403         /*
1404          * The owner can't disappear while holding a lock,
1405          * so the owner struct is protected by wait_lock.
1406          * Gets dropped in rt_mutex_adjust_prio_chain()!
1407          */
1408         get_task_struct(owner);
1409
1410         raw_spin_unlock_irq(&lock->wait_lock);
1411
1412         res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1413                                          next_lock, waiter, task);
1414
1415         raw_spin_lock_irq(&lock->wait_lock);
1416
1417         return res;
1418 }
1419
1420 /*
1421  * Remove the top waiter from the current tasks pi waiter tree and
1422  * queue it up.
1423  *
1424  * Called with lock->wait_lock held and interrupts disabled.
1425  */
1426 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1427                                     struct wake_q_head *wake_sleeper_q,
1428                                     struct rt_mutex *lock)
1429 {
1430         struct rt_mutex_waiter *waiter;
1431
1432         raw_spin_lock(&current->pi_lock);
1433
1434         waiter = rt_mutex_top_waiter(lock);
1435
1436         /*
1437          * Remove it from current->pi_waiters. We do not adjust a
1438          * possible priority boost right now. We execute wakeup in the
1439          * boosted mode and go back to normal after releasing
1440          * lock->wait_lock.
1441          */
1442         rt_mutex_dequeue_pi(current, waiter);
1443
1444         /*
1445          * As we are waking up the top waiter, and the waiter stays
1446          * queued on the lock until it gets the lock, this lock
1447          * obviously has waiters. Just set the bit here and this has
1448          * the added benefit of forcing all new tasks into the
1449          * slow path making sure no task of lower priority than
1450          * the top waiter can steal this lock.
1451          */
1452         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1453
1454         raw_spin_unlock(&current->pi_lock);
1455
1456         if (waiter->savestate)
1457                 wake_q_add(wake_sleeper_q, waiter->task);
1458         else
1459                 wake_q_add(wake_q, waiter->task);
1460 }
1461
1462 /*
1463  * Remove a waiter from a lock and give up
1464  *
1465  * Must be called with lock->wait_lock held and interrupts disabled. I must
1466  * have just failed to try_to_take_rt_mutex().
1467  */
1468 static void remove_waiter(struct rt_mutex *lock,
1469                           struct rt_mutex_waiter *waiter)
1470 {
1471         bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1472         struct task_struct *owner = rt_mutex_owner(lock);
1473         struct rt_mutex *next_lock = NULL;
1474
1475         raw_spin_lock(&current->pi_lock);
1476         rt_mutex_dequeue(lock, waiter);
1477         current->pi_blocked_on = NULL;
1478         raw_spin_unlock(&current->pi_lock);
1479
1480         /*
1481          * Only update priority if the waiter was the highest priority
1482          * waiter of the lock and there is an owner to update.
1483          */
1484         if (!owner || !is_top_waiter)
1485                 return;
1486
1487         raw_spin_lock(&owner->pi_lock);
1488
1489         rt_mutex_dequeue_pi(owner, waiter);
1490
1491         if (rt_mutex_has_waiters(lock))
1492                 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1493
1494         __rt_mutex_adjust_prio(owner);
1495
1496         /* Store the lock on which owner is blocked or NULL */
1497         if (rt_mutex_real_waiter(owner->pi_blocked_on))
1498                 next_lock = task_blocked_on_lock(owner);
1499
1500         raw_spin_unlock(&owner->pi_lock);
1501
1502         /*
1503          * Don't walk the chain, if the owner task is not blocked
1504          * itself.
1505          */
1506         if (!next_lock)
1507                 return;
1508
1509         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1510         get_task_struct(owner);
1511
1512         raw_spin_unlock_irq(&lock->wait_lock);
1513
1514         rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1515                                    next_lock, NULL, current);
1516
1517         raw_spin_lock_irq(&lock->wait_lock);
1518 }
1519
1520 /*
1521  * Recheck the pi chain, in case we got a priority setting
1522  *
1523  * Called from sched_setscheduler
1524  */
1525 void rt_mutex_adjust_pi(struct task_struct *task)
1526 {
1527         struct rt_mutex_waiter *waiter;
1528         struct rt_mutex *next_lock;
1529         unsigned long flags;
1530
1531         raw_spin_lock_irqsave(&task->pi_lock, flags);
1532
1533         waiter = task->pi_blocked_on;
1534         if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
1535                         !dl_prio(task->prio))) {
1536                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1537                 return;
1538         }
1539         next_lock = waiter->lock;
1540
1541         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1542         get_task_struct(task);
1543
1544         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1545         rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
1546                                    next_lock, NULL, task);
1547 }
1548
1549 /**
1550  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
1551  * @lock:                the rt_mutex to take
1552  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
1553  *                       or TASK_UNINTERRUPTIBLE)
1554  * @timeout:             the pre-initialized and started timer, or NULL for none
1555  * @waiter:              the pre-initialized rt_mutex_waiter
1556  *
1557  * Must be called with lock->wait_lock held and interrupts disabled
1558  */
1559 static int __sched
1560 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
1561                     struct hrtimer_sleeper *timeout,
1562                     struct rt_mutex_waiter *waiter,
1563                     struct ww_acquire_ctx *ww_ctx)
1564 {
1565         int ret = 0;
1566
1567         for (;;) {
1568                 /* Try to acquire the lock: */
1569                 if (try_to_take_rt_mutex(lock, current, waiter))
1570                         break;
1571
1572                 /*
1573                  * TASK_INTERRUPTIBLE checks for signals and
1574                  * timeout. Ignored otherwise.
1575                  */
1576                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
1577                         /* Signal pending? */
1578                         if (signal_pending(current))
1579                                 ret = -EINTR;
1580                         if (timeout && !timeout->task)
1581                                 ret = -ETIMEDOUT;
1582                         if (ret)
1583                                 break;
1584                 }
1585
1586                 if (ww_ctx && ww_ctx->acquired > 0) {
1587                         ret = __mutex_lock_check_stamp(lock, ww_ctx);
1588                         if (ret)
1589                                 break;
1590                 }
1591
1592                 raw_spin_unlock_irq(&lock->wait_lock);
1593
1594                 debug_rt_mutex_print_deadlock(waiter);
1595
1596                 schedule();
1597
1598                 raw_spin_lock_irq(&lock->wait_lock);
1599                 set_current_state(state);
1600         }
1601
1602         __set_current_state(TASK_RUNNING);
1603         return ret;
1604 }
1605
1606 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
1607                                      struct rt_mutex_waiter *w)
1608 {
1609         /*
1610          * If the result is not -EDEADLOCK or the caller requested
1611          * deadlock detection, nothing to do here.
1612          */
1613         if (res != -EDEADLOCK || detect_deadlock)
1614                 return;
1615
1616         /*
1617          * Yell lowdly and stop the task right here.
1618          */
1619         rt_mutex_print_deadlock(w);
1620         while (1) {
1621                 set_current_state(TASK_INTERRUPTIBLE);
1622                 schedule();
1623         }
1624 }
1625
1626 static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
1627                                                    struct ww_acquire_ctx *ww_ctx)
1628 {
1629 #ifdef CONFIG_DEBUG_MUTEXES
1630         /*
1631          * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
1632          * but released with a normal mutex_unlock in this call.
1633          *
1634          * This should never happen, always use ww_mutex_unlock.
1635          */
1636         DEBUG_LOCKS_WARN_ON(ww->ctx);
1637
1638         /*
1639          * Not quite done after calling ww_acquire_done() ?
1640          */
1641         DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
1642
1643         if (ww_ctx->contending_lock) {
1644                 /*
1645                  * After -EDEADLK you tried to
1646                  * acquire a different ww_mutex? Bad!
1647                  */
1648                 DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
1649
1650                 /*
1651                  * You called ww_mutex_lock after receiving -EDEADLK,
1652                  * but 'forgot' to unlock everything else first?
1653                  */
1654                 DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
1655                 ww_ctx->contending_lock = NULL;
1656         }
1657
1658         /*
1659          * Naughty, using a different class will lead to undefined behavior!
1660          */
1661         DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
1662 #endif
1663         ww_ctx->acquired++;
1664 }
1665
1666 #ifdef CONFIG_PREEMPT_RT_FULL
1667 static void ww_mutex_account_lock(struct rt_mutex *lock,
1668                                   struct ww_acquire_ctx *ww_ctx)
1669 {
1670         struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
1671         struct rt_mutex_waiter *waiter, *n;
1672
1673         /*
1674          * This branch gets optimized out for the common case,
1675          * and is only important for ww_mutex_lock.
1676          */
1677         ww_mutex_lock_acquired(ww, ww_ctx);
1678         ww->ctx = ww_ctx;
1679
1680         /*
1681          * Give any possible sleeping processes the chance to wake up,
1682          * so they can recheck if they have to back off.
1683          */
1684         rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
1685                                              tree_entry) {
1686                 /* XXX debug rt mutex waiter wakeup */
1687
1688                 BUG_ON(waiter->lock != lock);
1689                 rt_mutex_wake_waiter(waiter);
1690         }
1691 }
1692
1693 #else
1694
1695 static void ww_mutex_account_lock(struct rt_mutex *lock,
1696                                   struct ww_acquire_ctx *ww_ctx)
1697 {
1698         BUG();
1699 }
1700 #endif
1701
1702 /*
1703  * Slow path lock function:
1704  */
1705 static int __sched
1706 rt_mutex_slowlock(struct rt_mutex *lock, int state,
1707                   struct hrtimer_sleeper *timeout,
1708                   enum rtmutex_chainwalk chwalk,
1709                   struct ww_acquire_ctx *ww_ctx)
1710 {
1711         struct rt_mutex_waiter waiter;
1712         unsigned long flags;
1713         int ret = 0;
1714
1715         rt_mutex_init_waiter(&waiter, false);
1716
1717         /*
1718          * Technically we could use raw_spin_[un]lock_irq() here, but this can
1719          * be called in early boot if the cmpxchg() fast path is disabled
1720          * (debug, no architecture support). In this case we will acquire the
1721          * rtmutex with lock->wait_lock held. But we cannot unconditionally
1722          * enable interrupts in that early boot case. So we need to use the
1723          * irqsave/restore variants.
1724          */
1725         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1726
1727         /* Try to acquire the lock again: */
1728         if (try_to_take_rt_mutex(lock, current, NULL)) {
1729                 if (ww_ctx)
1730                         ww_mutex_account_lock(lock, ww_ctx);
1731                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1732                 return 0;
1733         }
1734
1735         set_current_state(state);
1736
1737         /* Setup the timer, when timeout != NULL */
1738         if (unlikely(timeout))
1739                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1740
1741         ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1742
1743         if (likely(!ret))
1744                 /* sleep on the mutex */
1745                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
1746                                           ww_ctx);
1747         else if (ww_ctx) {
1748                 /* ww_mutex received EDEADLK, let it become EALREADY */
1749                 ret = __mutex_lock_check_stamp(lock, ww_ctx);
1750                 BUG_ON(!ret);
1751         }
1752
1753         if (unlikely(ret)) {
1754                 __set_current_state(TASK_RUNNING);
1755                 if (rt_mutex_has_waiters(lock))
1756                         remove_waiter(lock, &waiter);
1757                 /* ww_mutex want to report EDEADLK/EALREADY, let them */
1758                 if (!ww_ctx)
1759                         rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1760         } else if (ww_ctx) {
1761                 ww_mutex_account_lock(lock, ww_ctx);
1762         }
1763
1764         /*
1765          * try_to_take_rt_mutex() sets the waiter bit
1766          * unconditionally. We might have to fix that up.
1767          */
1768         fixup_rt_mutex_waiters(lock);
1769
1770         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1771
1772         /* Remove pending timer: */
1773         if (unlikely(timeout))
1774                 hrtimer_cancel(&timeout->timer);
1775
1776         debug_rt_mutex_free_waiter(&waiter);
1777
1778         return ret;
1779 }
1780
1781 /*
1782  * Slow path try-lock function:
1783  */
1784 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
1785 {
1786         unsigned long flags;
1787         int ret;
1788
1789         /*
1790          * If the lock already has an owner we fail to get the lock.
1791          * This can be done without taking the @lock->wait_lock as
1792          * it is only being read, and this is a trylock anyway.
1793          */
1794         if (rt_mutex_owner(lock))
1795                 return 0;
1796
1797         /*
1798          * The mutex has currently no owner. Lock the wait lock and try to
1799          * acquire the lock. We use irqsave here to support early boot calls.
1800          */
1801         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1802
1803         ret = try_to_take_rt_mutex(lock, current, NULL);
1804
1805         /*
1806          * try_to_take_rt_mutex() sets the lock waiters bit
1807          * unconditionally. Clean this up.
1808          */
1809         fixup_rt_mutex_waiters(lock);
1810
1811         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1812
1813         return ret;
1814 }
1815
1816 /*
1817  * Slow path to release a rt-mutex.
1818  * Return whether the current task needs to undo a potential priority boosting.
1819  */
1820 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1821                                         struct wake_q_head *wake_q,
1822                                         struct wake_q_head *wake_sleeper_q)
1823 {
1824         unsigned long flags;
1825
1826         /* irqsave required to support early boot calls */
1827         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1828
1829         debug_rt_mutex_unlock(lock);
1830
1831         rt_mutex_deadlock_account_unlock(current);
1832
1833         /*
1834          * We must be careful here if the fast path is enabled. If we
1835          * have no waiters queued we cannot set owner to NULL here
1836          * because of:
1837          *
1838          * foo->lock->owner = NULL;
1839          *                      rtmutex_lock(foo->lock);   <- fast path
1840          *                      free = atomic_dec_and_test(foo->refcnt);
1841          *                      rtmutex_unlock(foo->lock); <- fast path
1842          *                      if (free)
1843          *                              kfree(foo);
1844          * raw_spin_unlock(foo->lock->wait_lock);
1845          *
1846          * So for the fastpath enabled kernel:
1847          *
1848          * Nothing can set the waiters bit as long as we hold
1849          * lock->wait_lock. So we do the following sequence:
1850          *
1851          *      owner = rt_mutex_owner(lock);
1852          *      clear_rt_mutex_waiters(lock);
1853          *      raw_spin_unlock(&lock->wait_lock);
1854          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
1855          *              return;
1856          *      goto retry;
1857          *
1858          * The fastpath disabled variant is simple as all access to
1859          * lock->owner is serialized by lock->wait_lock:
1860          *
1861          *      lock->owner = NULL;
1862          *      raw_spin_unlock(&lock->wait_lock);
1863          */
1864         while (!rt_mutex_has_waiters(lock)) {
1865                 /* Drops lock->wait_lock ! */
1866                 if (unlock_rt_mutex_safe(lock, flags) == true)
1867                         return false;
1868                 /* Relock the rtmutex and try again */
1869                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1870         }
1871
1872         /*
1873          * The wakeup next waiter path does not suffer from the above
1874          * race. See the comments there.
1875          *
1876          * Queue the next waiter for wakeup once we release the wait_lock.
1877          */
1878         mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
1879
1880         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1881
1882         /* check PI boosting */
1883         return true;
1884 }
1885
1886 /*
1887  * debug aware fast / slowpath lock,trylock,unlock
1888  *
1889  * The atomic acquire/release ops are compiled away, when either the
1890  * architecture does not support cmpxchg or when debugging is enabled.
1891  */
1892 static inline int
1893 rt_mutex_fastlock(struct rt_mutex *lock, int state,
1894                   struct ww_acquire_ctx *ww_ctx,
1895                   int (*slowfn)(struct rt_mutex *lock, int state,
1896                                 struct hrtimer_sleeper *timeout,
1897                                 enum rtmutex_chainwalk chwalk,
1898                                 struct ww_acquire_ctx *ww_ctx))
1899 {
1900         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1901                 rt_mutex_deadlock_account_lock(lock, current);
1902                 return 0;
1903         } else
1904                 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
1905                               ww_ctx);
1906 }
1907
1908 static inline int
1909 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
1910                         struct hrtimer_sleeper *timeout,
1911                         enum rtmutex_chainwalk chwalk,
1912                         struct ww_acquire_ctx *ww_ctx,
1913                         int (*slowfn)(struct rt_mutex *lock, int state,
1914                                       struct hrtimer_sleeper *timeout,
1915                                       enum rtmutex_chainwalk chwalk,
1916                                       struct ww_acquire_ctx *ww_ctx))
1917 {
1918         if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
1919             likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1920                 rt_mutex_deadlock_account_lock(lock, current);
1921                 return 0;
1922         } else
1923                 return slowfn(lock, state, timeout, chwalk, ww_ctx);
1924 }
1925
1926 static inline int
1927 rt_mutex_fasttrylock(struct rt_mutex *lock,
1928                      int (*slowfn)(struct rt_mutex *lock))
1929 {
1930         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1931                 rt_mutex_deadlock_account_lock(lock, current);
1932                 return 1;
1933         }
1934         return slowfn(lock);
1935 }
1936
1937 static inline void
1938 rt_mutex_fastunlock(struct rt_mutex *lock,
1939                     bool (*slowfn)(struct rt_mutex *lock,
1940                                    struct wake_q_head *wqh,
1941                                    struct wake_q_head *wq_sleeper))
1942 {
1943         WAKE_Q(wake_q);
1944         WAKE_Q(wake_sleeper_q);
1945
1946         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
1947                 rt_mutex_deadlock_account_unlock(current);
1948
1949         } else {
1950                 bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
1951
1952                 wake_up_q(&wake_q);
1953                 wake_up_q_sleeper(&wake_sleeper_q);
1954
1955                 /* Undo pi boosting if necessary: */
1956                 if (deboost)
1957                         rt_mutex_adjust_prio(current);
1958         }
1959 }
1960
1961 /**
1962  * rt_mutex_lock - lock a rt_mutex
1963  *
1964  * @lock: the rt_mutex to be locked
1965  */
1966 void __sched rt_mutex_lock(struct rt_mutex *lock)
1967 {
1968         might_sleep();
1969
1970         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
1971 }
1972 EXPORT_SYMBOL_GPL(rt_mutex_lock);
1973
1974 /**
1975  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1976  *
1977  * @lock:               the rt_mutex to be locked
1978  *
1979  * Returns:
1980  *  0           on success
1981  * -EINTR       when interrupted by a signal
1982  */
1983 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
1984 {
1985         might_sleep();
1986
1987         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
1988 }
1989 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1990
1991 /*
1992  * Futex variant with full deadlock detection.
1993  */
1994 int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
1995                               struct hrtimer_sleeper *timeout)
1996 {
1997         might_sleep();
1998
1999         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
2000                                        RT_MUTEX_FULL_CHAINWALK, NULL,
2001                                        rt_mutex_slowlock);
2002 }
2003
2004 /**
2005  * rt_mutex_lock_killable - lock a rt_mutex killable
2006  *
2007  * @lock:              the rt_mutex to be locked
2008  * @detect_deadlock:   deadlock detection on/off
2009  *
2010  * Returns:
2011  *  0          on success
2012  * -EINTR      when interrupted by a signal
2013  * -EDEADLK    when the lock would deadlock (when deadlock detection is on)
2014  */
2015 int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
2016 {
2017         might_sleep();
2018
2019         return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
2020 }
2021 EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
2022
2023 /**
2024  * rt_mutex_timed_lock - lock a rt_mutex interruptible
2025  *                      the timeout structure is provided
2026  *                      by the caller
2027  *
2028  * @lock:               the rt_mutex to be locked
2029  * @timeout:            timeout structure or NULL (no timeout)
2030  *
2031  * Returns:
2032  *  0           on success
2033  * -EINTR       when interrupted by a signal
2034  * -ETIMEDOUT   when the timeout expired
2035  */
2036 int
2037 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
2038 {
2039         might_sleep();
2040
2041         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
2042                                        RT_MUTEX_MIN_CHAINWALK,
2043                                        NULL,
2044                                        rt_mutex_slowlock);
2045 }
2046 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
2047
2048 /**
2049  * rt_mutex_trylock - try to lock a rt_mutex
2050  *
2051  * @lock:       the rt_mutex to be locked
2052  *
2053  * This function can only be called in thread context. It's safe to
2054  * call it from atomic regions, but not from hard interrupt or soft
2055  * interrupt context.
2056  *
2057  * Returns 1 on success and 0 on contention
2058  */
2059 int __sched rt_mutex_trylock(struct rt_mutex *lock)
2060 {
2061         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
2062 }
2063 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
2064
2065 /**
2066  * rt_mutex_unlock - unlock a rt_mutex
2067  *
2068  * @lock: the rt_mutex to be unlocked
2069  */
2070 void __sched rt_mutex_unlock(struct rt_mutex *lock)
2071 {
2072         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
2073 }
2074 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
2075
2076 /**
2077  * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
2078  * @lock: the rt_mutex to be unlocked
2079  *
2080  * Returns: true/false indicating whether priority adjustment is
2081  * required or not.
2082  */
2083 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
2084                                    struct wake_q_head *wqh,
2085                                    struct wake_q_head *wq_sleeper)
2086 {
2087         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
2088                 rt_mutex_deadlock_account_unlock(current);
2089                 return false;
2090         }
2091         return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
2092 }
2093
2094 /**
2095  * rt_mutex_destroy - mark a mutex unusable
2096  * @lock: the mutex to be destroyed
2097  *
2098  * This function marks the mutex uninitialized, and any subsequent
2099  * use of the mutex is forbidden. The mutex must not be locked when
2100  * this function is called.
2101  */
2102 void rt_mutex_destroy(struct rt_mutex *lock)
2103 {
2104         WARN_ON(rt_mutex_is_locked(lock));
2105 #ifdef CONFIG_DEBUG_RT_MUTEXES
2106         lock->magic = NULL;
2107 #endif
2108 }
2109
2110 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
2111
2112 /**
2113  * __rt_mutex_init - initialize the rt lock
2114  *
2115  * @lock: the rt lock to be initialized
2116  *
2117  * Initialize the rt lock to unlocked state.
2118  *
2119  * Initializing of a locked rt lock is not allowed
2120  */
2121 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
2122 {
2123         lock->owner = NULL;
2124         lock->waiters = RB_ROOT;
2125         lock->waiters_leftmost = NULL;
2126
2127         debug_rt_mutex_init(lock, name);
2128 }
2129 EXPORT_SYMBOL(__rt_mutex_init);
2130
2131 /**
2132  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
2133  *                              proxy owner
2134  *
2135  * @lock:       the rt_mutex to be locked
2136  * @proxy_owner:the task to set as owner
2137  *
2138  * No locking. Caller has to do serializing itself
2139  * Special API call for PI-futex support
2140  */
2141 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
2142                                 struct task_struct *proxy_owner)
2143 {
2144         rt_mutex_init(lock);
2145         debug_rt_mutex_proxy_lock(lock, proxy_owner);
2146         rt_mutex_set_owner(lock, proxy_owner);
2147         rt_mutex_deadlock_account_lock(lock, proxy_owner);
2148 }
2149
2150 /**
2151  * rt_mutex_proxy_unlock - release a lock on behalf of owner
2152  *
2153  * @lock:       the rt_mutex to be locked
2154  *
2155  * No locking. Caller has to do serializing itself
2156  * Special API call for PI-futex support
2157  */
2158 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
2159                            struct task_struct *proxy_owner)
2160 {
2161         debug_rt_mutex_proxy_unlock(lock);
2162         rt_mutex_set_owner(lock, NULL);
2163         rt_mutex_deadlock_account_unlock(proxy_owner);
2164 }
2165
2166 /**
2167  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
2168  * @lock:               the rt_mutex to take
2169  * @waiter:             the pre-initialized rt_mutex_waiter
2170  * @task:               the task to prepare
2171  *
2172  * Returns:
2173  *  0 - task blocked on lock
2174  *  1 - acquired the lock for task, caller should wake it up
2175  * <0 - error
2176  *
2177  * Special API call for FUTEX_REQUEUE_PI support.
2178  */
2179 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
2180                               struct rt_mutex_waiter *waiter,
2181                               struct task_struct *task)
2182 {
2183         int ret;
2184
2185         raw_spin_lock_irq(&lock->wait_lock);
2186
2187         if (try_to_take_rt_mutex(lock, task, NULL)) {
2188                 raw_spin_unlock_irq(&lock->wait_lock);
2189                 return 1;
2190         }
2191
2192 #ifdef CONFIG_PREEMPT_RT_FULL
2193         /*
2194          * In PREEMPT_RT there's an added race.
2195          * If the task, that we are about to requeue, times out,
2196          * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
2197          * to skip this task. But right after the task sets
2198          * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
2199          * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
2200          * This will replace the PI_WAKEUP_INPROGRESS with the actual
2201          * lock that it blocks on. We *must not* place this task
2202          * on this proxy lock in that case.
2203          *
2204          * To prevent this race, we first take the task's pi_lock
2205          * and check if it has updated its pi_blocked_on. If it has,
2206          * we assume that it woke up and we return -EAGAIN.
2207          * Otherwise, we set the task's pi_blocked_on to
2208          * PI_REQUEUE_INPROGRESS, so that if the task is waking up
2209          * it will know that we are in the process of requeuing it.
2210          */
2211         raw_spin_lock(&task->pi_lock);
2212         if (task->pi_blocked_on) {
2213                 raw_spin_unlock(&task->pi_lock);
2214                 raw_spin_unlock_irq(&lock->wait_lock);
2215                 return -EAGAIN;
2216         }
2217         task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
2218         raw_spin_unlock(&task->pi_lock);
2219 #endif
2220
2221         /* We enforce deadlock detection for futexes */
2222         ret = task_blocks_on_rt_mutex(lock, waiter, task,
2223                                       RT_MUTEX_FULL_CHAINWALK);
2224
2225         if (ret && !rt_mutex_owner(lock)) {
2226                 /*
2227                  * Reset the return value. We might have
2228                  * returned with -EDEADLK and the owner
2229                  * released the lock while we were walking the
2230                  * pi chain.  Let the waiter sort it out.
2231                  */
2232                 ret = 0;
2233         }
2234
2235         if (ret && rt_mutex_has_waiters(lock))
2236                 remove_waiter(lock, waiter);
2237
2238         raw_spin_unlock_irq(&lock->wait_lock);
2239
2240         debug_rt_mutex_print_deadlock(waiter);
2241
2242         return ret;
2243 }
2244
2245 /**
2246  * rt_mutex_next_owner - return the next owner of the lock
2247  *
2248  * @lock: the rt lock query
2249  *
2250  * Returns the next owner of the lock or NULL
2251  *
2252  * Caller has to serialize against other accessors to the lock
2253  * itself.
2254  *
2255  * Special API call for PI-futex support
2256  */
2257 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
2258 {
2259         if (!rt_mutex_has_waiters(lock))
2260                 return NULL;
2261
2262         return rt_mutex_top_waiter(lock)->task;
2263 }
2264
2265 /**
2266  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
2267  * @lock:               the rt_mutex we were woken on
2268  * @to:                 the timeout, null if none. hrtimer should already have
2269  *                      been started.
2270  * @waiter:             the pre-initialized rt_mutex_waiter
2271  *
2272  * Complete the lock acquisition started our behalf by another thread.
2273  *
2274  * Returns:
2275  *  0 - success
2276  * <0 - error, one of -EINTR, -ETIMEDOUT
2277  *
2278  * Special API call for PI-futex requeue support
2279  */
2280 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
2281                                struct hrtimer_sleeper *to,
2282                                struct rt_mutex_waiter *waiter)
2283 {
2284         int ret;
2285
2286         raw_spin_lock_irq(&lock->wait_lock);
2287
2288         set_current_state(TASK_INTERRUPTIBLE);
2289
2290         /* sleep on the mutex */
2291         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
2292
2293         if (unlikely(ret))
2294                 remove_waiter(lock, waiter);
2295
2296         /*
2297          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
2298          * have to fix that up.
2299          */
2300         fixup_rt_mutex_waiters(lock);
2301
2302         raw_spin_unlock_irq(&lock->wait_lock);
2303
2304         return ret;
2305 }
2306
2307 static inline int
2308 ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
2309 {
2310 #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
2311         unsigned tmp;
2312
2313         if (ctx->deadlock_inject_countdown-- == 0) {
2314                 tmp = ctx->deadlock_inject_interval;
2315                 if (tmp > UINT_MAX/4)
2316                         tmp = UINT_MAX;
2317                 else
2318                         tmp = tmp*2 + tmp + tmp/2;
2319
2320                 ctx->deadlock_inject_interval = tmp;
2321                 ctx->deadlock_inject_countdown = tmp;
2322                 ctx->contending_lock = lock;
2323
2324                 ww_mutex_unlock(lock);
2325
2326                 return -EDEADLK;
2327         }
2328 #endif
2329
2330         return 0;
2331 }
2332
2333 #ifdef CONFIG_PREEMPT_RT_FULL
2334 int __sched
2335 __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
2336 {
2337         int ret;
2338
2339         might_sleep();
2340
2341         mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
2342         ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
2343         if (ret)
2344                 mutex_release(&lock->base.dep_map, 1, _RET_IP_);
2345         else if (!ret && ww_ctx->acquired > 1)
2346                 return ww_mutex_deadlock_injection(lock, ww_ctx);
2347
2348         return ret;
2349 }
2350 EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
2351
2352 int __sched
2353 __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
2354 {
2355         int ret;
2356
2357         might_sleep();
2358
2359         mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
2360         ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
2361         if (ret)
2362                 mutex_release(&lock->base.dep_map, 1, _RET_IP_);
2363         else if (!ret && ww_ctx->acquired > 1)
2364                 return ww_mutex_deadlock_injection(lock, ww_ctx);
2365
2366         return ret;
2367 }
2368 EXPORT_SYMBOL_GPL(__ww_mutex_lock);
2369
2370 void __sched ww_mutex_unlock(struct ww_mutex *lock)
2371 {
2372         int nest = !!lock->ctx;
2373
2374         /*
2375          * The unlocking fastpath is the 0->1 transition from 'locked'
2376          * into 'unlocked' state:
2377          */
2378         if (nest) {
2379 #ifdef CONFIG_DEBUG_MUTEXES
2380                 DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
2381 #endif
2382                 if (lock->ctx->acquired > 0)
2383                         lock->ctx->acquired--;
2384                 lock->ctx = NULL;
2385         }
2386
2387         mutex_release(&lock->base.dep_map, nest, _RET_IP_);
2388         rt_mutex_unlock(&lock->base.lock);
2389 }
2390 EXPORT_SYMBOL(ww_mutex_unlock);
2391 #endif