These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / locking / rtmutex.c
1 /*
2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3  *
4  * started by Ingo Molnar and Thomas Gleixner.
5  *
6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9  *  Copyright (C) 2006 Esben Nielsen
10  *  Adaptive Spinlocks:
11  *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
12  *                                   and Peter Morreale,
13  * Adaptive Spinlocks simplification:
14  *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
15  *
16  *  See Documentation/locking/rt-mutex-design.txt for details.
17  */
18 #include <linux/spinlock.h>
19 #include <linux/export.h>
20 #include <linux/sched.h>
21 #include <linux/sched/rt.h>
22 #include <linux/sched/deadline.h>
23 #include <linux/timer.h>
24 #include <linux/ww_mutex.h>
25
26 #include "rtmutex_common.h"
27
28 /*
29  * lock->owner state tracking:
30  *
31  * lock->owner holds the task_struct pointer of the owner. Bit 0
32  * is used to keep track of the "lock has waiters" state.
33  *
34  * owner        bit0
35  * NULL         0       lock is free (fast acquire possible)
36  * NULL         1       lock is free and has waiters and the top waiter
37  *                              is going to take the lock*
38  * taskpointer  0       lock is held (fast release possible)
39  * taskpointer  1       lock is held and has waiters**
40  *
41  * The fast atomic compare exchange based acquire and release is only
42  * possible when bit 0 of lock->owner is 0.
43  *
44  * (*) It also can be a transitional state when grabbing the lock
45  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
46  * we need to set the bit0 before looking at the lock, and the owner may be
47  * NULL in this small time, hence this can be a transitional state.
48  *
49  * (**) There is a small time when bit 0 is set but there are no
50  * waiters. This can happen when grabbing the lock in the slow path.
51  * To prevent a cmpxchg of the owner releasing the lock, we need to
52  * set this bit before looking at the lock.
53  */
54
55 static void
56 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
57 {
58         unsigned long val = (unsigned long)owner;
59
60         if (rt_mutex_has_waiters(lock))
61                 val |= RT_MUTEX_HAS_WAITERS;
62
63         lock->owner = (struct task_struct *)val;
64 }
65
66 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
67 {
68         lock->owner = (struct task_struct *)
69                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
70 }
71
72 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
73 {
74         if (!rt_mutex_has_waiters(lock))
75                 clear_rt_mutex_waiters(lock);
76 }
77
78 static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
79 {
80         return waiter && waiter != PI_WAKEUP_INPROGRESS &&
81                 waiter != PI_REQUEUE_INPROGRESS;
82 }
83
84 /*
85  * We can speed up the acquire/release, if there's no debugging state to be
86  * set up.
87  */
88 #ifndef CONFIG_DEBUG_RT_MUTEXES
89 # define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
90 # define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
91 # define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
92
93 /*
94  * Callers must hold the ->wait_lock -- which is the whole purpose as we force
95  * all future threads that attempt to [Rmw] the lock to the slowpath. As such
96  * relaxed semantics suffice.
97  */
98 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
99 {
100         unsigned long owner, *p = (unsigned long *) &lock->owner;
101
102         do {
103                 owner = *p;
104         } while (cmpxchg_relaxed(p, owner,
105                                  owner | RT_MUTEX_HAS_WAITERS) != owner);
106 }
107
108 /*
109  * Safe fastpath aware unlock:
110  * 1) Clear the waiters bit
111  * 2) Drop lock->wait_lock
112  * 3) Try to unlock the lock with cmpxchg
113  */
114 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
115                                         unsigned long flags)
116         __releases(lock->wait_lock)
117 {
118         struct task_struct *owner = rt_mutex_owner(lock);
119
120         clear_rt_mutex_waiters(lock);
121         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
122         /*
123          * If a new waiter comes in between the unlock and the cmpxchg
124          * we have two situations:
125          *
126          * unlock(wait_lock);
127          *                                      lock(wait_lock);
128          * cmpxchg(p, owner, 0) == owner
129          *                                      mark_rt_mutex_waiters(lock);
130          *                                      acquire(lock);
131          * or:
132          *
133          * unlock(wait_lock);
134          *                                      lock(wait_lock);
135          *                                      mark_rt_mutex_waiters(lock);
136          *
137          * cmpxchg(p, owner, 0) != owner
138          *                                      enqueue_waiter();
139          *                                      unlock(wait_lock);
140          * lock(wait_lock);
141          * wake waiter();
142          * unlock(wait_lock);
143          *                                      lock(wait_lock);
144          *                                      acquire(lock);
145          */
146         return rt_mutex_cmpxchg_release(lock, owner, NULL);
147 }
148
149 #else
150 # define rt_mutex_cmpxchg_relaxed(l,c,n)        (0)
151 # define rt_mutex_cmpxchg_acquire(l,c,n)        (0)
152 # define rt_mutex_cmpxchg_release(l,c,n)        (0)
153
154 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
155 {
156         lock->owner = (struct task_struct *)
157                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
158 }
159
160 /*
161  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
162  */
163 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
164                                         unsigned long flags)
165         __releases(lock->wait_lock)
166 {
167         lock->owner = NULL;
168         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
169         return true;
170 }
171 #endif
172
173 static inline int
174 rt_mutex_waiter_less(struct rt_mutex_waiter *left,
175                      struct rt_mutex_waiter *right)
176 {
177         if (left->prio < right->prio)
178                 return 1;
179
180         /*
181          * If both waiters have dl_prio(), we check the deadlines of the
182          * associated tasks.
183          * If left waiter has a dl_prio(), and we didn't return 1 above,
184          * then right waiter has a dl_prio() too.
185          */
186         if (dl_prio(left->prio))
187                 return dl_time_before(left->task->dl.deadline,
188                                       right->task->dl.deadline);
189
190         return 0;
191 }
192
193 static void
194 rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
195 {
196         struct rb_node **link = &lock->waiters.rb_node;
197         struct rb_node *parent = NULL;
198         struct rt_mutex_waiter *entry;
199         int leftmost = 1;
200
201         while (*link) {
202                 parent = *link;
203                 entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
204                 if (rt_mutex_waiter_less(waiter, entry)) {
205                         link = &parent->rb_left;
206                 } else {
207                         link = &parent->rb_right;
208                         leftmost = 0;
209                 }
210         }
211
212         if (leftmost)
213                 lock->waiters_leftmost = &waiter->tree_entry;
214
215         rb_link_node(&waiter->tree_entry, parent, link);
216         rb_insert_color(&waiter->tree_entry, &lock->waiters);
217 }
218
219 static void
220 rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
221 {
222         if (RB_EMPTY_NODE(&waiter->tree_entry))
223                 return;
224
225         if (lock->waiters_leftmost == &waiter->tree_entry)
226                 lock->waiters_leftmost = rb_next(&waiter->tree_entry);
227
228         rb_erase(&waiter->tree_entry, &lock->waiters);
229         RB_CLEAR_NODE(&waiter->tree_entry);
230 }
231
232 static void
233 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
234 {
235         struct rb_node **link = &task->pi_waiters.rb_node;
236         struct rb_node *parent = NULL;
237         struct rt_mutex_waiter *entry;
238         int leftmost = 1;
239
240         while (*link) {
241                 parent = *link;
242                 entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
243                 if (rt_mutex_waiter_less(waiter, entry)) {
244                         link = &parent->rb_left;
245                 } else {
246                         link = &parent->rb_right;
247                         leftmost = 0;
248                 }
249         }
250
251         if (leftmost)
252                 task->pi_waiters_leftmost = &waiter->pi_tree_entry;
253
254         rb_link_node(&waiter->pi_tree_entry, parent, link);
255         rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
256 }
257
258 static void
259 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
260 {
261         if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
262                 return;
263
264         if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
265                 task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
266
267         rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
268         RB_CLEAR_NODE(&waiter->pi_tree_entry);
269 }
270
271 /*
272  * Calculate task priority from the waiter tree priority
273  *
274  * Return task->normal_prio when the waiter tree is empty or when
275  * the waiter is not allowed to do priority boosting
276  */
277 int rt_mutex_getprio(struct task_struct *task)
278 {
279         if (likely(!task_has_pi_waiters(task)))
280                 return task->normal_prio;
281
282         return min(task_top_pi_waiter(task)->prio,
283                    task->normal_prio);
284 }
285
286 struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
287 {
288         if (likely(!task_has_pi_waiters(task)))
289                 return NULL;
290
291         return task_top_pi_waiter(task)->task;
292 }
293
294 /*
295  * Called by sched_setscheduler() to get the priority which will be
296  * effective after the change.
297  */
298 int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
299 {
300         if (!task_has_pi_waiters(task))
301                 return newprio;
302
303         if (task_top_pi_waiter(task)->task->prio <= newprio)
304                 return task_top_pi_waiter(task)->task->prio;
305         return newprio;
306 }
307
308 /*
309  * Adjust the priority of a task, after its pi_waiters got modified.
310  *
311  * This can be both boosting and unboosting. task->pi_lock must be held.
312  */
313 static void __rt_mutex_adjust_prio(struct task_struct *task)
314 {
315         int prio = rt_mutex_getprio(task);
316
317         if (task->prio != prio || dl_prio(prio))
318                 rt_mutex_setprio(task, prio);
319 }
320
321 /*
322  * Adjust task priority (undo boosting). Called from the exit path of
323  * rt_mutex_slowunlock() and rt_mutex_slowlock().
324  *
325  * (Note: We do this outside of the protection of lock->wait_lock to
326  * allow the lock to be taken while or before we readjust the priority
327  * of task. We do not use the spin_xx_mutex() variants here as we are
328  * outside of the debug path.)
329  */
330 void rt_mutex_adjust_prio(struct task_struct *task)
331 {
332         unsigned long flags;
333
334         raw_spin_lock_irqsave(&task->pi_lock, flags);
335         __rt_mutex_adjust_prio(task);
336         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
337 }
338
339 /*
340  * Deadlock detection is conditional:
341  *
342  * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
343  * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
344  *
345  * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
346  * conducted independent of the detect argument.
347  *
348  * If the waiter argument is NULL this indicates the deboost path and
349  * deadlock detection is disabled independent of the detect argument
350  * and the config settings.
351  */
352 static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
353                                           enum rtmutex_chainwalk chwalk)
354 {
355         /*
356          * This is just a wrapper function for the following call,
357          * because debug_rt_mutex_detect_deadlock() smells like a magic
358          * debug feature and I wanted to keep the cond function in the
359          * main source file along with the comments instead of having
360          * two of the same in the headers.
361          */
362         return debug_rt_mutex_detect_deadlock(waiter, chwalk);
363 }
364
365 static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter)
366 {
367         if (waiter->savestate)
368                 wake_up_lock_sleeper(waiter->task);
369         else
370                 wake_up_process(waiter->task);
371 }
372
373 /*
374  * Max number of times we'll walk the boosting chain:
375  */
376 int max_lock_depth = 1024;
377
378 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
379 {
380         return rt_mutex_real_waiter(p->pi_blocked_on) ?
381                 p->pi_blocked_on->lock : NULL;
382 }
383
384 /*
385  * Adjust the priority chain. Also used for deadlock detection.
386  * Decreases task's usage by one - may thus free the task.
387  *
388  * @task:       the task owning the mutex (owner) for which a chain walk is
389  *              probably needed
390  * @chwalk:     do we have to carry out deadlock detection?
391  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
392  *              things for a task that has just got its priority adjusted, and
393  *              is waiting on a mutex)
394  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
395  *              we dropped its pi_lock. Is never dereferenced, only used for
396  *              comparison to detect lock chain changes.
397  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
398  *              its priority to the mutex owner (can be NULL in the case
399  *              depicted above or if the top waiter is gone away and we are
400  *              actually deboosting the owner)
401  * @top_task:   the current top waiter
402  *
403  * Returns 0 or -EDEADLK.
404  *
405  * Chain walk basics and protection scope
406  *
407  * [R] refcount on task
408  * [P] task->pi_lock held
409  * [L] rtmutex->wait_lock held
410  *
411  * Step Description                             Protected by
412  *      function arguments:
413  *      @task                                   [R]
414  *      @orig_lock if != NULL                   @top_task is blocked on it
415  *      @next_lock                              Unprotected. Cannot be
416  *                                              dereferenced. Only used for
417  *                                              comparison.
418  *      @orig_waiter if != NULL                 @top_task is blocked on it
419  *      @top_task                               current, or in case of proxy
420  *                                              locking protected by calling
421  *                                              code
422  *      again:
423  *        loop_sanity_check();
424  *      retry:
425  * [1]    lock(task->pi_lock);                  [R] acquire [P]
426  * [2]    waiter = task->pi_blocked_on;         [P]
427  * [3]    check_exit_conditions_1();            [P]
428  * [4]    lock = waiter->lock;                  [P]
429  * [5]    if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
430  *          unlock(task->pi_lock);              release [P]
431  *          goto retry;
432  *        }
433  * [6]    check_exit_conditions_2();            [P] + [L]
434  * [7]    requeue_lock_waiter(lock, waiter);    [P] + [L]
435  * [8]    unlock(task->pi_lock);                release [P]
436  *        put_task_struct(task);                release [R]
437  * [9]    check_exit_conditions_3();            [L]
438  * [10]   task = owner(lock);                   [L]
439  *        get_task_struct(task);                [L] acquire [R]
440  *        lock(task->pi_lock);                  [L] acquire [P]
441  * [11]   requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
442  * [12]   check_exit_conditions_4();            [P] + [L]
443  * [13]   unlock(task->pi_lock);                release [P]
444  *        unlock(lock->wait_lock);              release [L]
445  *        goto again;
446  */
447 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
448                                       enum rtmutex_chainwalk chwalk,
449                                       struct rt_mutex *orig_lock,
450                                       struct rt_mutex *next_lock,
451                                       struct rt_mutex_waiter *orig_waiter,
452                                       struct task_struct *top_task)
453 {
454         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
455         struct rt_mutex_waiter *prerequeue_top_waiter;
456         int ret = 0, depth = 0;
457         struct rt_mutex *lock;
458         bool detect_deadlock;
459         bool requeue = true;
460
461         detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
462
463         /*
464          * The (de)boosting is a step by step approach with a lot of
465          * pitfalls. We want this to be preemptible and we want hold a
466          * maximum of two locks per step. So we have to check
467          * carefully whether things change under us.
468          */
469  again:
470         /*
471          * We limit the lock chain length for each invocation.
472          */
473         if (++depth > max_lock_depth) {
474                 static int prev_max;
475
476                 /*
477                  * Print this only once. If the admin changes the limit,
478                  * print a new message when reaching the limit again.
479                  */
480                 if (prev_max != max_lock_depth) {
481                         prev_max = max_lock_depth;
482                         printk(KERN_WARNING "Maximum lock depth %d reached "
483                                "task: %s (%d)\n", max_lock_depth,
484                                top_task->comm, task_pid_nr(top_task));
485                 }
486                 put_task_struct(task);
487
488                 return -EDEADLK;
489         }
490
491         /*
492          * We are fully preemptible here and only hold the refcount on
493          * @task. So everything can have changed under us since the
494          * caller or our own code below (goto retry/again) dropped all
495          * locks.
496          */
497  retry:
498         /*
499          * [1] Task cannot go away as we did a get_task() before !
500          */
501         raw_spin_lock_irq(&task->pi_lock);
502
503         /*
504          * [2] Get the waiter on which @task is blocked on.
505          */
506         waiter = task->pi_blocked_on;
507
508         /*
509          * [3] check_exit_conditions_1() protected by task->pi_lock.
510          */
511
512         /*
513          * Check whether the end of the boosting chain has been
514          * reached or the state of the chain has changed while we
515          * dropped the locks.
516          */
517         if (!rt_mutex_real_waiter(waiter))
518                 goto out_unlock_pi;
519
520         /*
521          * Check the orig_waiter state. After we dropped the locks,
522          * the previous owner of the lock might have released the lock.
523          */
524         if (orig_waiter && !rt_mutex_owner(orig_lock))
525                 goto out_unlock_pi;
526
527         /*
528          * We dropped all locks after taking a refcount on @task, so
529          * the task might have moved on in the lock chain or even left
530          * the chain completely and blocks now on an unrelated lock or
531          * on @orig_lock.
532          *
533          * We stored the lock on which @task was blocked in @next_lock,
534          * so we can detect the chain change.
535          */
536         if (next_lock != waiter->lock)
537                 goto out_unlock_pi;
538
539         /*
540          * Drop out, when the task has no waiters. Note,
541          * top_waiter can be NULL, when we are in the deboosting
542          * mode!
543          */
544         if (top_waiter) {
545                 if (!task_has_pi_waiters(task))
546                         goto out_unlock_pi;
547                 /*
548                  * If deadlock detection is off, we stop here if we
549                  * are not the top pi waiter of the task. If deadlock
550                  * detection is enabled we continue, but stop the
551                  * requeueing in the chain walk.
552                  */
553                 if (top_waiter != task_top_pi_waiter(task)) {
554                         if (!detect_deadlock)
555                                 goto out_unlock_pi;
556                         else
557                                 requeue = false;
558                 }
559         }
560
561         /*
562          * If the waiter priority is the same as the task priority
563          * then there is no further priority adjustment necessary.  If
564          * deadlock detection is off, we stop the chain walk. If its
565          * enabled we continue, but stop the requeueing in the chain
566          * walk.
567          */
568         if (waiter->prio == task->prio) {
569                 if (!detect_deadlock)
570                         goto out_unlock_pi;
571                 else
572                         requeue = false;
573         }
574
575         /*
576          * [4] Get the next lock
577          */
578         lock = waiter->lock;
579         /*
580          * [5] We need to trylock here as we are holding task->pi_lock,
581          * which is the reverse lock order versus the other rtmutex
582          * operations.
583          */
584         if (!raw_spin_trylock(&lock->wait_lock)) {
585                 raw_spin_unlock_irq(&task->pi_lock);
586                 cpu_relax();
587                 goto retry;
588         }
589
590         /*
591          * [6] check_exit_conditions_2() protected by task->pi_lock and
592          * lock->wait_lock.
593          *
594          * Deadlock detection. If the lock is the same as the original
595          * lock which caused us to walk the lock chain or if the
596          * current lock is owned by the task which initiated the chain
597          * walk, we detected a deadlock.
598          */
599         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
600                 debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
601                 raw_spin_unlock(&lock->wait_lock);
602                 ret = -EDEADLK;
603                 goto out_unlock_pi;
604         }
605
606         /*
607          * If we just follow the lock chain for deadlock detection, no
608          * need to do all the requeue operations. To avoid a truckload
609          * of conditionals around the various places below, just do the
610          * minimum chain walk checks.
611          */
612         if (!requeue) {
613                 /*
614                  * No requeue[7] here. Just release @task [8]
615                  */
616                 raw_spin_unlock(&task->pi_lock);
617                 put_task_struct(task);
618
619                 /*
620                  * [9] check_exit_conditions_3 protected by lock->wait_lock.
621                  * If there is no owner of the lock, end of chain.
622                  */
623                 if (!rt_mutex_owner(lock)) {
624                         raw_spin_unlock_irq(&lock->wait_lock);
625                         return 0;
626                 }
627
628                 /* [10] Grab the next task, i.e. owner of @lock */
629                 task = rt_mutex_owner(lock);
630                 get_task_struct(task);
631                 raw_spin_lock(&task->pi_lock);
632
633                 /*
634                  * No requeue [11] here. We just do deadlock detection.
635                  *
636                  * [12] Store whether owner is blocked
637                  * itself. Decision is made after dropping the locks
638                  */
639                 next_lock = task_blocked_on_lock(task);
640                 /*
641                  * Get the top waiter for the next iteration
642                  */
643                 top_waiter = rt_mutex_top_waiter(lock);
644
645                 /* [13] Drop locks */
646                 raw_spin_unlock(&task->pi_lock);
647                 raw_spin_unlock_irq(&lock->wait_lock);
648
649                 /* If owner is not blocked, end of chain. */
650                 if (!next_lock)
651                         goto out_put_task;
652                 goto again;
653         }
654
655         /*
656          * Store the current top waiter before doing the requeue
657          * operation on @lock. We need it for the boost/deboost
658          * decision below.
659          */
660         prerequeue_top_waiter = rt_mutex_top_waiter(lock);
661
662         /* [7] Requeue the waiter in the lock waiter tree. */
663         rt_mutex_dequeue(lock, waiter);
664         waiter->prio = task->prio;
665         rt_mutex_enqueue(lock, waiter);
666
667         /* [8] Release the task */
668         raw_spin_unlock(&task->pi_lock);
669         put_task_struct(task);
670
671         /*
672          * [9] check_exit_conditions_3 protected by lock->wait_lock.
673          *
674          * We must abort the chain walk if there is no lock owner even
675          * in the dead lock detection case, as we have nothing to
676          * follow here. This is the end of the chain we are walking.
677          */
678         if (!rt_mutex_owner(lock)) {
679                 struct rt_mutex_waiter *lock_top_waiter;
680
681                 /*
682                  * If the requeue [7] above changed the top waiter,
683                  * then we need to wake the new top waiter up to try
684                  * to get the lock.
685                  */
686                 lock_top_waiter = rt_mutex_top_waiter(lock);
687                 if (prerequeue_top_waiter != lock_top_waiter)
688                         rt_mutex_wake_waiter(lock_top_waiter);
689                 raw_spin_unlock_irq(&lock->wait_lock);
690                 return 0;
691         }
692
693         /* [10] Grab the next task, i.e. the owner of @lock */
694         task = rt_mutex_owner(lock);
695         get_task_struct(task);
696         raw_spin_lock(&task->pi_lock);
697
698         /* [11] requeue the pi waiters if necessary */
699         if (waiter == rt_mutex_top_waiter(lock)) {
700                 /*
701                  * The waiter became the new top (highest priority)
702                  * waiter on the lock. Replace the previous top waiter
703                  * in the owner tasks pi waiters tree with this waiter
704                  * and adjust the priority of the owner.
705                  */
706                 rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
707                 rt_mutex_enqueue_pi(task, waiter);
708                 __rt_mutex_adjust_prio(task);
709
710         } else if (prerequeue_top_waiter == waiter) {
711                 /*
712                  * The waiter was the top waiter on the lock, but is
713                  * no longer the top prority waiter. Replace waiter in
714                  * the owner tasks pi waiters tree with the new top
715                  * (highest priority) waiter and adjust the priority
716                  * of the owner.
717                  * The new top waiter is stored in @waiter so that
718                  * @waiter == @top_waiter evaluates to true below and
719                  * we continue to deboost the rest of the chain.
720                  */
721                 rt_mutex_dequeue_pi(task, waiter);
722                 waiter = rt_mutex_top_waiter(lock);
723                 rt_mutex_enqueue_pi(task, waiter);
724                 __rt_mutex_adjust_prio(task);
725         } else {
726                 /*
727                  * Nothing changed. No need to do any priority
728                  * adjustment.
729                  */
730         }
731
732         /*
733          * [12] check_exit_conditions_4() protected by task->pi_lock
734          * and lock->wait_lock. The actual decisions are made after we
735          * dropped the locks.
736          *
737          * Check whether the task which owns the current lock is pi
738          * blocked itself. If yes we store a pointer to the lock for
739          * the lock chain change detection above. After we dropped
740          * task->pi_lock next_lock cannot be dereferenced anymore.
741          */
742         next_lock = task_blocked_on_lock(task);
743         /*
744          * Store the top waiter of @lock for the end of chain walk
745          * decision below.
746          */
747         top_waiter = rt_mutex_top_waiter(lock);
748
749         /* [13] Drop the locks */
750         raw_spin_unlock(&task->pi_lock);
751         raw_spin_unlock_irq(&lock->wait_lock);
752
753         /*
754          * Make the actual exit decisions [12], based on the stored
755          * values.
756          *
757          * We reached the end of the lock chain. Stop right here. No
758          * point to go back just to figure that out.
759          */
760         if (!next_lock)
761                 goto out_put_task;
762
763         /*
764          * If the current waiter is not the top waiter on the lock,
765          * then we can stop the chain walk here if we are not in full
766          * deadlock detection mode.
767          */
768         if (!detect_deadlock && waiter != top_waiter)
769                 goto out_put_task;
770
771         goto again;
772
773  out_unlock_pi:
774         raw_spin_unlock_irq(&task->pi_lock);
775  out_put_task:
776         put_task_struct(task);
777
778         return ret;
779 }
780
781
782 #define STEAL_NORMAL  0
783 #define STEAL_LATERAL 1
784
785 /*
786  * Note that RT tasks are excluded from lateral-steals to prevent the
787  * introduction of an unbounded latency
788  */
789 static inline int lock_is_stealable(struct task_struct *task,
790                                     struct task_struct *pendowner, int mode)
791 {
792     if (mode == STEAL_NORMAL || rt_task(task)) {
793             if (task->prio >= pendowner->prio)
794                     return 0;
795     } else if (task->prio > pendowner->prio)
796             return 0;
797     return 1;
798 }
799
800 /*
801  * Try to take an rt-mutex
802  *
803  * Must be called with lock->wait_lock held and interrupts disabled
804  *
805  * @lock:   The lock to be acquired.
806  * @task:   The task which wants to acquire the lock
807  * @waiter: The waiter that is queued to the lock's wait tree if the
808  *          callsite called task_blocked_on_lock(), otherwise NULL
809  */
810 static int __try_to_take_rt_mutex(struct rt_mutex *lock,
811                                   struct task_struct *task,
812                                   struct rt_mutex_waiter *waiter, int mode)
813 {
814         /*
815          * Before testing whether we can acquire @lock, we set the
816          * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
817          * other tasks which try to modify @lock into the slow path
818          * and they serialize on @lock->wait_lock.
819          *
820          * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
821          * as explained at the top of this file if and only if:
822          *
823          * - There is a lock owner. The caller must fixup the
824          *   transient state if it does a trylock or leaves the lock
825          *   function due to a signal or timeout.
826          *
827          * - @task acquires the lock and there are no other
828          *   waiters. This is undone in rt_mutex_set_owner(@task) at
829          *   the end of this function.
830          */
831         mark_rt_mutex_waiters(lock);
832
833         /*
834          * If @lock has an owner, give up.
835          */
836         if (rt_mutex_owner(lock))
837                 return 0;
838
839         /*
840          * If @waiter != NULL, @task has already enqueued the waiter
841          * into @lock waiter tree. If @waiter == NULL then this is a
842          * trylock attempt.
843          */
844         if (waiter) {
845                 /*
846                  * If waiter is not the highest priority waiter of
847                  * @lock, give up.
848                  */
849                 if (waiter != rt_mutex_top_waiter(lock)) {
850                         /* XXX lock_is_stealable() ? */
851                         return 0;
852                 }
853
854                 /*
855                  * We can acquire the lock. Remove the waiter from the
856                  * lock waiters tree.
857                  */
858                 rt_mutex_dequeue(lock, waiter);
859
860         } else {
861                 /*
862                  * If the lock has waiters already we check whether @task is
863                  * eligible to take over the lock.
864                  *
865                  * If there are no other waiters, @task can acquire
866                  * the lock.  @task->pi_blocked_on is NULL, so it does
867                  * not need to be dequeued.
868                  */
869                 if (rt_mutex_has_waiters(lock)) {
870                         struct task_struct *pown = rt_mutex_top_waiter(lock)->task;
871
872                         if (task != pown && !lock_is_stealable(task, pown, mode))
873                                 return 0;
874                         /*
875                          * The current top waiter stays enqueued. We
876                          * don't have to change anything in the lock
877                          * waiters order.
878                          */
879                 } else {
880                         /*
881                          * No waiters. Take the lock without the
882                          * pi_lock dance.@task->pi_blocked_on is NULL
883                          * and we have no waiters to enqueue in @task
884                          * pi waiters tree.
885                          */
886                         goto takeit;
887                 }
888         }
889
890         /*
891          * Clear @task->pi_blocked_on. Requires protection by
892          * @task->pi_lock. Redundant operation for the @waiter == NULL
893          * case, but conditionals are more expensive than a redundant
894          * store.
895          */
896         raw_spin_lock(&task->pi_lock);
897         task->pi_blocked_on = NULL;
898         /*
899          * Finish the lock acquisition. @task is the new owner. If
900          * other waiters exist we have to insert the highest priority
901          * waiter into @task->pi_waiters tree.
902          */
903         if (rt_mutex_has_waiters(lock))
904                 rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
905         raw_spin_unlock(&task->pi_lock);
906
907 takeit:
908         /* We got the lock. */
909         debug_rt_mutex_lock(lock);
910
911         /*
912          * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
913          * are still waiters or clears it.
914          */
915         rt_mutex_set_owner(lock, task);
916
917         rt_mutex_deadlock_account_lock(lock, task);
918
919         return 1;
920 }
921
922 #ifdef CONFIG_PREEMPT_RT_FULL
923 /*
924  * preemptible spin_lock functions:
925  */
926 static inline void rt_spin_lock_fastlock(struct rt_mutex *lock,
927                                          void  (*slowfn)(struct rt_mutex *lock,
928                                                          bool mg_off),
929                                          bool do_mig_dis)
930 {
931         might_sleep_no_state_check();
932
933         if (do_mig_dis)
934                 migrate_disable();
935
936         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
937                 rt_mutex_deadlock_account_lock(lock, current);
938         else
939                 slowfn(lock, do_mig_dis);
940 }
941
942 static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock,
943                                            void  (*slowfn)(struct rt_mutex *lock))
944 {
945         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
946                 rt_mutex_deadlock_account_unlock(current);
947         else
948                 slowfn(lock);
949 }
950 #ifdef CONFIG_SMP
951 /*
952  * Note that owner is a speculative pointer and dereferencing relies
953  * on rcu_read_lock() and the check against the lock owner.
954  */
955 static int adaptive_wait(struct rt_mutex *lock,
956                          struct task_struct *owner)
957 {
958         int res = 0;
959
960         rcu_read_lock();
961         for (;;) {
962                 if (owner != rt_mutex_owner(lock))
963                         break;
964                 /*
965                  * Ensure that owner->on_cpu is dereferenced _after_
966                  * checking the above to be valid.
967                  */
968                 barrier();
969                 if (!owner->on_cpu) {
970                         res = 1;
971                         break;
972                 }
973                 cpu_relax();
974         }
975         rcu_read_unlock();
976         return res;
977 }
978 #else
979 static int adaptive_wait(struct rt_mutex *lock,
980                          struct task_struct *orig_owner)
981 {
982         return 1;
983 }
984 #endif
985
986 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
987                                    struct rt_mutex_waiter *waiter,
988                                    struct task_struct *task,
989                                    enum rtmutex_chainwalk chwalk);
990 /*
991  * Slow path lock function spin_lock style: this variant is very
992  * careful not to miss any non-lock wakeups.
993  *
994  * We store the current state under p->pi_lock in p->saved_state and
995  * the try_to_wake_up() code handles this accordingly.
996  */
997 static void  noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock,
998                                                     bool mg_off)
999 {
1000         struct task_struct *lock_owner, *self = current;
1001         struct rt_mutex_waiter waiter, *top_waiter;
1002         unsigned long flags;
1003         int ret;
1004
1005         rt_mutex_init_waiter(&waiter, true);
1006
1007         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1008
1009         if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) {
1010                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1011                 return;
1012         }
1013
1014         BUG_ON(rt_mutex_owner(lock) == self);
1015
1016         /*
1017          * We save whatever state the task is in and we'll restore it
1018          * after acquiring the lock taking real wakeups into account
1019          * as well. We are serialized via pi_lock against wakeups. See
1020          * try_to_wake_up().
1021          */
1022         raw_spin_lock(&self->pi_lock);
1023         self->saved_state = self->state;
1024         __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
1025         raw_spin_unlock(&self->pi_lock);
1026
1027         ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK);
1028         BUG_ON(ret);
1029
1030         for (;;) {
1031                 /* Try to acquire the lock again. */
1032                 if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL))
1033                         break;
1034
1035                 top_waiter = rt_mutex_top_waiter(lock);
1036                 lock_owner = rt_mutex_owner(lock);
1037
1038                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1039
1040                 debug_rt_mutex_print_deadlock(&waiter);
1041
1042                 if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) {
1043                         if (mg_off)
1044                                 migrate_enable();
1045                         schedule();
1046                         if (mg_off)
1047                                 migrate_disable();
1048                 }
1049
1050                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1051
1052                 raw_spin_lock(&self->pi_lock);
1053                 __set_current_state_no_track(TASK_UNINTERRUPTIBLE);
1054                 raw_spin_unlock(&self->pi_lock);
1055         }
1056
1057         /*
1058          * Restore the task state to current->saved_state. We set it
1059          * to the original state above and the try_to_wake_up() code
1060          * has possibly updated it when a real (non-rtmutex) wakeup
1061          * happened while we were blocked. Clear saved_state so
1062          * try_to_wakeup() does not get confused.
1063          */
1064         raw_spin_lock(&self->pi_lock);
1065         __set_current_state_no_track(self->saved_state);
1066         self->saved_state = TASK_RUNNING;
1067         raw_spin_unlock(&self->pi_lock);
1068
1069         /*
1070          * try_to_take_rt_mutex() sets the waiter bit
1071          * unconditionally. We might have to fix that up:
1072          */
1073         fixup_rt_mutex_waiters(lock);
1074
1075         BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock));
1076         BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry));
1077
1078         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1079
1080         debug_rt_mutex_free_waiter(&waiter);
1081 }
1082
1083 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1084                                     struct wake_q_head *wake_sleeper_q,
1085                                     struct rt_mutex *lock);
1086 /*
1087  * Slow path to release a rt_mutex spin_lock style
1088  */
1089 static void  noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock)
1090 {
1091         unsigned long flags;
1092         WAKE_Q(wake_q);
1093         WAKE_Q(wake_sleeper_q);
1094
1095         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1096
1097         debug_rt_mutex_unlock(lock);
1098
1099         rt_mutex_deadlock_account_unlock(current);
1100
1101         if (!rt_mutex_has_waiters(lock)) {
1102                 lock->owner = NULL;
1103                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1104                 return;
1105         }
1106
1107         mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock);
1108
1109         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1110         wake_up_q(&wake_q);
1111         wake_up_q_sleeper(&wake_sleeper_q);
1112
1113         /* Undo pi boosting.when necessary */
1114         rt_mutex_adjust_prio(current);
1115 }
1116
1117 void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock)
1118 {
1119         rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false);
1120         spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1121 }
1122 EXPORT_SYMBOL(rt_spin_lock__no_mg);
1123
1124 void __lockfunc rt_spin_lock(spinlock_t *lock)
1125 {
1126         rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
1127         spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
1128 }
1129 EXPORT_SYMBOL(rt_spin_lock);
1130
1131 void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
1132 {
1133         rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true);
1134 }
1135 EXPORT_SYMBOL(__rt_spin_lock);
1136
1137 void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock)
1138 {
1139         rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false);
1140 }
1141 EXPORT_SYMBOL(__rt_spin_lock__no_mg);
1142
1143 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1144 void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
1145 {
1146         spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
1147         rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true);
1148 }
1149 EXPORT_SYMBOL(rt_spin_lock_nested);
1150 #endif
1151
1152 void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock)
1153 {
1154         /* NOTE: we always pass in '1' for nested, for simplicity */
1155         spin_release(&lock->dep_map, 1, _RET_IP_);
1156         rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
1157 }
1158 EXPORT_SYMBOL(rt_spin_unlock__no_mg);
1159
1160 void __lockfunc rt_spin_unlock(spinlock_t *lock)
1161 {
1162         /* NOTE: we always pass in '1' for nested, for simplicity */
1163         spin_release(&lock->dep_map, 1, _RET_IP_);
1164         rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
1165         migrate_enable();
1166 }
1167 EXPORT_SYMBOL(rt_spin_unlock);
1168
1169 void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
1170 {
1171         rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
1172 }
1173 EXPORT_SYMBOL(__rt_spin_unlock);
1174
1175 /*
1176  * Wait for the lock to get unlocked: instead of polling for an unlock
1177  * (like raw spinlocks do), we lock and unlock, to force the kernel to
1178  * schedule if there's contention:
1179  */
1180 void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
1181 {
1182         spin_lock(lock);
1183         spin_unlock(lock);
1184 }
1185 EXPORT_SYMBOL(rt_spin_unlock_wait);
1186
1187 int __lockfunc __rt_spin_trylock(struct rt_mutex *lock)
1188 {
1189         return rt_mutex_trylock(lock);
1190 }
1191
1192 int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock)
1193 {
1194         int ret;
1195
1196         ret = rt_mutex_trylock(&lock->lock);
1197         if (ret)
1198                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1199         return ret;
1200 }
1201 EXPORT_SYMBOL(rt_spin_trylock__no_mg);
1202
1203 int __lockfunc rt_spin_trylock(spinlock_t *lock)
1204 {
1205         int ret;
1206
1207         migrate_disable();
1208         ret = rt_mutex_trylock(&lock->lock);
1209         if (ret)
1210                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1211         else
1212                 migrate_enable();
1213         return ret;
1214 }
1215 EXPORT_SYMBOL(rt_spin_trylock);
1216
1217 int __lockfunc rt_spin_trylock_bh(spinlock_t *lock)
1218 {
1219         int ret;
1220
1221         local_bh_disable();
1222         ret = rt_mutex_trylock(&lock->lock);
1223         if (ret) {
1224                 migrate_disable();
1225                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1226         } else
1227                 local_bh_enable();
1228         return ret;
1229 }
1230 EXPORT_SYMBOL(rt_spin_trylock_bh);
1231
1232 int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
1233 {
1234         int ret;
1235
1236         *flags = 0;
1237         ret = rt_mutex_trylock(&lock->lock);
1238         if (ret) {
1239                 migrate_disable();
1240                 spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
1241         }
1242         return ret;
1243 }
1244 EXPORT_SYMBOL(rt_spin_trylock_irqsave);
1245
1246 int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock)
1247 {
1248         /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
1249         if (atomic_add_unless(atomic, -1, 1))
1250                 return 0;
1251         rt_spin_lock(lock);
1252         if (atomic_dec_and_test(atomic))
1253                 return 1;
1254         rt_spin_unlock(lock);
1255         return 0;
1256 }
1257 EXPORT_SYMBOL(atomic_dec_and_spin_lock);
1258
1259         void
1260 __rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
1261 {
1262 #ifdef CONFIG_DEBUG_LOCK_ALLOC
1263         /*
1264          * Make sure we are not reinitializing a held lock:
1265          */
1266         debug_check_no_locks_freed((void *)lock, sizeof(*lock));
1267         lockdep_init_map(&lock->dep_map, name, key, 0);
1268 #endif
1269 }
1270 EXPORT_SYMBOL(__rt_spin_lock_init);
1271
1272 #endif /* PREEMPT_RT_FULL */
1273
1274 #ifdef CONFIG_PREEMPT_RT_FULL
1275         static inline int __sched
1276 __mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
1277 {
1278         struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
1279         struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
1280
1281         if (!hold_ctx)
1282                 return 0;
1283
1284         if (unlikely(ctx == hold_ctx))
1285                 return -EALREADY;
1286
1287         if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
1288             (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
1289 #ifdef CONFIG_DEBUG_MUTEXES
1290                 DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
1291                 ctx->contending_lock = ww;
1292 #endif
1293                 return -EDEADLK;
1294         }
1295
1296         return 0;
1297 }
1298 #else
1299         static inline int __sched
1300 __mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
1301 {
1302         BUG();
1303         return 0;
1304 }
1305
1306 #endif
1307
1308 static inline int
1309 try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
1310                      struct rt_mutex_waiter *waiter)
1311 {
1312         return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL);
1313 }
1314
1315 /*
1316  * Task blocks on lock.
1317  *
1318  * Prepare waiter and propagate pi chain
1319  *
1320  * This must be called with lock->wait_lock held and interrupts disabled
1321  */
1322 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
1323                                    struct rt_mutex_waiter *waiter,
1324                                    struct task_struct *task,
1325                                    enum rtmutex_chainwalk chwalk)
1326 {
1327         struct task_struct *owner = rt_mutex_owner(lock);
1328         struct rt_mutex_waiter *top_waiter = waiter;
1329         struct rt_mutex *next_lock;
1330         int chain_walk = 0, res;
1331
1332         /*
1333          * Early deadlock detection. We really don't want the task to
1334          * enqueue on itself just to untangle the mess later. It's not
1335          * only an optimization. We drop the locks, so another waiter
1336          * can come in before the chain walk detects the deadlock. So
1337          * the other will detect the deadlock and return -EDEADLOCK,
1338          * which is wrong, as the other waiter is not in a deadlock
1339          * situation.
1340          */
1341         if (owner == task)
1342                 return -EDEADLK;
1343
1344         raw_spin_lock(&task->pi_lock);
1345
1346         /*
1347          * In the case of futex requeue PI, this will be a proxy
1348          * lock. The task will wake unaware that it is enqueueed on
1349          * this lock. Avoid blocking on two locks and corrupting
1350          * pi_blocked_on via the PI_WAKEUP_INPROGRESS
1351          * flag. futex_wait_requeue_pi() sets this when it wakes up
1352          * before requeue (due to a signal or timeout). Do not enqueue
1353          * the task if PI_WAKEUP_INPROGRESS is set.
1354          */
1355         if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) {
1356                 raw_spin_unlock(&task->pi_lock);
1357                 return -EAGAIN;
1358         }
1359
1360         BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on));
1361
1362         __rt_mutex_adjust_prio(task);
1363         waiter->task = task;
1364         waiter->lock = lock;
1365         waiter->prio = task->prio;
1366
1367         /* Get the top priority waiter on the lock */
1368         if (rt_mutex_has_waiters(lock))
1369                 top_waiter = rt_mutex_top_waiter(lock);
1370         rt_mutex_enqueue(lock, waiter);
1371
1372         task->pi_blocked_on = waiter;
1373
1374         raw_spin_unlock(&task->pi_lock);
1375
1376         if (!owner)
1377                 return 0;
1378
1379         raw_spin_lock(&owner->pi_lock);
1380         if (waiter == rt_mutex_top_waiter(lock)) {
1381                 rt_mutex_dequeue_pi(owner, top_waiter);
1382                 rt_mutex_enqueue_pi(owner, waiter);
1383
1384                 __rt_mutex_adjust_prio(owner);
1385                 if (rt_mutex_real_waiter(owner->pi_blocked_on))
1386                         chain_walk = 1;
1387         } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1388                 chain_walk = 1;
1389         }
1390
1391         /* Store the lock on which owner is blocked or NULL */
1392         next_lock = task_blocked_on_lock(owner);
1393
1394         raw_spin_unlock(&owner->pi_lock);
1395         /*
1396          * Even if full deadlock detection is on, if the owner is not
1397          * blocked itself, we can avoid finding this out in the chain
1398          * walk.
1399          */
1400         if (!chain_walk || !next_lock)
1401                 return 0;
1402
1403         /*
1404          * The owner can't disappear while holding a lock,
1405          * so the owner struct is protected by wait_lock.
1406          * Gets dropped in rt_mutex_adjust_prio_chain()!
1407          */
1408         get_task_struct(owner);
1409
1410         raw_spin_unlock_irq(&lock->wait_lock);
1411
1412         res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1413                                          next_lock, waiter, task);
1414
1415         raw_spin_lock_irq(&lock->wait_lock);
1416
1417         return res;
1418 }
1419
1420 /*
1421  * Remove the top waiter from the current tasks pi waiter tree and
1422  * queue it up.
1423  *
1424  * Called with lock->wait_lock held and interrupts disabled.
1425  */
1426 static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
1427                                     struct wake_q_head *wake_sleeper_q,
1428                                     struct rt_mutex *lock)
1429 {
1430         struct rt_mutex_waiter *waiter;
1431
1432         raw_spin_lock(&current->pi_lock);
1433
1434         waiter = rt_mutex_top_waiter(lock);
1435
1436         /*
1437          * Remove it from current->pi_waiters. We do not adjust a
1438          * possible priority boost right now. We execute wakeup in the
1439          * boosted mode and go back to normal after releasing
1440          * lock->wait_lock.
1441          */
1442         rt_mutex_dequeue_pi(current, waiter);
1443
1444         /*
1445          * As we are waking up the top waiter, and the waiter stays
1446          * queued on the lock until it gets the lock, this lock
1447          * obviously has waiters. Just set the bit here and this has
1448          * the added benefit of forcing all new tasks into the
1449          * slow path making sure no task of lower priority than
1450          * the top waiter can steal this lock.
1451          */
1452         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1453
1454         raw_spin_unlock(&current->pi_lock);
1455
1456         if (waiter->savestate)
1457                 wake_q_add(wake_sleeper_q, waiter->task);
1458         else
1459                 wake_q_add(wake_q, waiter->task);
1460 }
1461
1462 /*
1463  * Remove a waiter from a lock and give up
1464  *
1465  * Must be called with lock->wait_lock held and interrupts disabled. I must
1466  * have just failed to try_to_take_rt_mutex().
1467  */
1468 static void remove_waiter(struct rt_mutex *lock,
1469                           struct rt_mutex_waiter *waiter)
1470 {
1471         bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1472         struct task_struct *owner = rt_mutex_owner(lock);
1473         struct rt_mutex *next_lock = NULL;
1474
1475         raw_spin_lock(&current->pi_lock);
1476         rt_mutex_dequeue(lock, waiter);
1477         current->pi_blocked_on = NULL;
1478         raw_spin_unlock(&current->pi_lock);
1479
1480         /*
1481          * Only update priority if the waiter was the highest priority
1482          * waiter of the lock and there is an owner to update.
1483          */
1484         if (!owner || !is_top_waiter)
1485                 return;
1486
1487         raw_spin_lock(&owner->pi_lock);
1488
1489         rt_mutex_dequeue_pi(owner, waiter);
1490
1491         if (rt_mutex_has_waiters(lock))
1492                 rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1493
1494         __rt_mutex_adjust_prio(owner);
1495
1496         /* Store the lock on which owner is blocked or NULL */
1497         if (rt_mutex_real_waiter(owner->pi_blocked_on))
1498                 next_lock = task_blocked_on_lock(owner);
1499
1500         raw_spin_unlock(&owner->pi_lock);
1501
1502         /*
1503          * Don't walk the chain, if the owner task is not blocked
1504          * itself.
1505          */
1506         if (!next_lock)
1507                 return;
1508
1509         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1510         get_task_struct(owner);
1511
1512         raw_spin_unlock_irq(&lock->wait_lock);
1513
1514         rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1515                                    next_lock, NULL, current);
1516
1517         raw_spin_lock_irq(&lock->wait_lock);
1518 }
1519
1520 /*
1521  * Recheck the pi chain, in case we got a priority setting
1522  *
1523  * Called from sched_setscheduler
1524  */
1525 void rt_mutex_adjust_pi(struct task_struct *task)
1526 {
1527         struct rt_mutex_waiter *waiter;
1528         struct rt_mutex *next_lock;
1529         unsigned long flags;
1530
1531         raw_spin_lock_irqsave(&task->pi_lock, flags);
1532
1533         waiter = task->pi_blocked_on;
1534         if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio &&
1535                         !dl_prio(task->prio))) {
1536                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1537                 return;
1538         }
1539         next_lock = waiter->lock;
1540
1541         /* gets dropped in rt_mutex_adjust_prio_chain()! */
1542         get_task_struct(task);
1543
1544         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
1545         rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL,
1546                                    next_lock, NULL, task);
1547 }
1548
1549 /**
1550  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
1551  * @lock:                the rt_mutex to take
1552  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
1553  *                       or TASK_UNINTERRUPTIBLE)
1554  * @timeout:             the pre-initialized and started timer, or NULL for none
1555  * @waiter:              the pre-initialized rt_mutex_waiter
1556  *
1557  * Must be called with lock->wait_lock held and interrupts disabled
1558  */
1559 static int __sched
1560 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
1561                     struct hrtimer_sleeper *timeout,
1562                     struct rt_mutex_waiter *waiter,
1563                     struct ww_acquire_ctx *ww_ctx)
1564 {
1565         int ret = 0;
1566
1567         for (;;) {
1568                 /* Try to acquire the lock: */
1569                 if (try_to_take_rt_mutex(lock, current, waiter))
1570                         break;
1571
1572                 /*
1573                  * TASK_INTERRUPTIBLE checks for signals and
1574                  * timeout. Ignored otherwise.
1575                  */
1576                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
1577                         /* Signal pending? */
1578                         if (signal_pending(current))
1579                                 ret = -EINTR;
1580                         if (timeout && !timeout->task)
1581                                 ret = -ETIMEDOUT;
1582                         if (ret)
1583                                 break;
1584                 }
1585
1586                 if (ww_ctx && ww_ctx->acquired > 0) {
1587                         ret = __mutex_lock_check_stamp(lock, ww_ctx);
1588                         if (ret)
1589                                 break;
1590                 }
1591
1592                 raw_spin_unlock_irq(&lock->wait_lock);
1593
1594                 debug_rt_mutex_print_deadlock(waiter);
1595
1596                 schedule();
1597
1598                 raw_spin_lock_irq(&lock->wait_lock);
1599                 set_current_state(state);
1600         }
1601
1602         __set_current_state(TASK_RUNNING);
1603         return ret;
1604 }
1605
1606 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
1607                                      struct rt_mutex_waiter *w)
1608 {
1609         /*
1610          * If the result is not -EDEADLOCK or the caller requested
1611          * deadlock detection, nothing to do here.
1612          */
1613         if (res != -EDEADLOCK || detect_deadlock)
1614                 return;
1615
1616         /*
1617          * Yell lowdly and stop the task right here.
1618          */
1619         rt_mutex_print_deadlock(w);
1620         while (1) {
1621                 set_current_state(TASK_INTERRUPTIBLE);
1622                 schedule();
1623         }
1624 }
1625
1626 static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
1627                                                    struct ww_acquire_ctx *ww_ctx)
1628 {
1629 #ifdef CONFIG_DEBUG_MUTEXES
1630         /*
1631          * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
1632          * but released with a normal mutex_unlock in this call.
1633          *
1634          * This should never happen, always use ww_mutex_unlock.
1635          */
1636         DEBUG_LOCKS_WARN_ON(ww->ctx);
1637
1638         /*
1639          * Not quite done after calling ww_acquire_done() ?
1640          */
1641         DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
1642
1643         if (ww_ctx->contending_lock) {
1644                 /*
1645                  * After -EDEADLK you tried to
1646                  * acquire a different ww_mutex? Bad!
1647                  */
1648                 DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
1649
1650                 /*
1651                  * You called ww_mutex_lock after receiving -EDEADLK,
1652                  * but 'forgot' to unlock everything else first?
1653                  */
1654                 DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
1655                 ww_ctx->contending_lock = NULL;
1656         }
1657
1658         /*
1659          * Naughty, using a different class will lead to undefined behavior!
1660          */
1661         DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
1662 #endif
1663         ww_ctx->acquired++;
1664 }
1665
1666 #ifdef CONFIG_PREEMPT_RT_FULL
1667 static void ww_mutex_account_lock(struct rt_mutex *lock,
1668                                   struct ww_acquire_ctx *ww_ctx)
1669 {
1670         struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
1671         struct rt_mutex_waiter *waiter, *n;
1672
1673         /*
1674          * This branch gets optimized out for the common case,
1675          * and is only important for ww_mutex_lock.
1676          */
1677         ww_mutex_lock_acquired(ww, ww_ctx);
1678         ww->ctx = ww_ctx;
1679
1680         /*
1681          * Give any possible sleeping processes the chance to wake up,
1682          * so they can recheck if they have to back off.
1683          */
1684         rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
1685                                              tree_entry) {
1686                 /* XXX debug rt mutex waiter wakeup */
1687
1688                 BUG_ON(waiter->lock != lock);
1689                 rt_mutex_wake_waiter(waiter);
1690         }
1691 }
1692
1693 #else
1694
1695 static void ww_mutex_account_lock(struct rt_mutex *lock,
1696                                   struct ww_acquire_ctx *ww_ctx)
1697 {
1698         BUG();
1699 }
1700 #endif
1701
1702 /*
1703  * Slow path lock function:
1704  */
1705 static int __sched
1706 rt_mutex_slowlock(struct rt_mutex *lock, int state,
1707                   struct hrtimer_sleeper *timeout,
1708                   enum rtmutex_chainwalk chwalk,
1709                   struct ww_acquire_ctx *ww_ctx)
1710 {
1711         struct rt_mutex_waiter waiter;
1712         unsigned long flags;
1713         int ret = 0;
1714
1715         rt_mutex_init_waiter(&waiter, false);
1716
1717         /*
1718          * Technically we could use raw_spin_[un]lock_irq() here, but this can
1719          * be called in early boot if the cmpxchg() fast path is disabled
1720          * (debug, no architecture support). In this case we will acquire the
1721          * rtmutex with lock->wait_lock held. But we cannot unconditionally
1722          * enable interrupts in that early boot case. So we need to use the
1723          * irqsave/restore variants.
1724          */
1725         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1726
1727         /* Try to acquire the lock again: */
1728         if (try_to_take_rt_mutex(lock, current, NULL)) {
1729                 if (ww_ctx)
1730                         ww_mutex_account_lock(lock, ww_ctx);
1731                 raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1732                 return 0;
1733         }
1734
1735         set_current_state(state);
1736
1737         /* Setup the timer, when timeout != NULL */
1738         if (unlikely(timeout))
1739                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
1740
1741         ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
1742
1743         if (likely(!ret))
1744                 /* sleep on the mutex */
1745                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
1746                                           ww_ctx);
1747         else if (ww_ctx) {
1748                 /* ww_mutex received EDEADLK, let it become EALREADY */
1749                 ret = __mutex_lock_check_stamp(lock, ww_ctx);
1750                 BUG_ON(!ret);
1751         }
1752
1753         if (unlikely(ret)) {
1754                 __set_current_state(TASK_RUNNING);
1755                 if (rt_mutex_has_waiters(lock))
1756                         remove_waiter(lock, &waiter);
1757                 /* ww_mutex want to report EDEADLK/EALREADY, let them */
1758                 if (!ww_ctx)
1759                         rt_mutex_handle_deadlock(ret, chwalk, &waiter);
1760         } else if (ww_ctx) {
1761                 ww_mutex_account_lock(lock, ww_ctx);
1762         }
1763
1764         /*
1765          * try_to_take_rt_mutex() sets the waiter bit
1766          * unconditionally. We might have to fix that up.
1767          */
1768         fixup_rt_mutex_waiters(lock);
1769
1770         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1771
1772         /* Remove pending timer: */
1773         if (unlikely(timeout))
1774                 hrtimer_cancel(&timeout->timer);
1775
1776         debug_rt_mutex_free_waiter(&waiter);
1777
1778         return ret;
1779 }
1780
1781 /*
1782  * Slow path try-lock function:
1783  */
1784 static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
1785 {
1786         unsigned long flags;
1787         int ret;
1788
1789         /*
1790          * If the lock already has an owner we fail to get the lock.
1791          * This can be done without taking the @lock->wait_lock as
1792          * it is only being read, and this is a trylock anyway.
1793          */
1794         if (rt_mutex_owner(lock))
1795                 return 0;
1796
1797         /*
1798          * The mutex has currently no owner. Lock the wait lock and try to
1799          * acquire the lock. We use irqsave here to support early boot calls.
1800          */
1801         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1802
1803         ret = try_to_take_rt_mutex(lock, current, NULL);
1804
1805         /*
1806          * try_to_take_rt_mutex() sets the lock waiters bit
1807          * unconditionally. Clean this up.
1808          */
1809         fixup_rt_mutex_waiters(lock);
1810
1811         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1812
1813         return ret;
1814 }
1815
1816 /*
1817  * Slow path to release a rt-mutex.
1818  * Return whether the current task needs to undo a potential priority boosting.
1819  */
1820 static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
1821                                         struct wake_q_head *wake_q,
1822                                         struct wake_q_head *wake_sleeper_q)
1823 {
1824         unsigned long flags;
1825
1826         /* irqsave required to support early boot calls */
1827         raw_spin_lock_irqsave(&lock->wait_lock, flags);
1828
1829         debug_rt_mutex_unlock(lock);
1830
1831         rt_mutex_deadlock_account_unlock(current);
1832
1833         /*
1834          * We must be careful here if the fast path is enabled. If we
1835          * have no waiters queued we cannot set owner to NULL here
1836          * because of:
1837          *
1838          * foo->lock->owner = NULL;
1839          *                      rtmutex_lock(foo->lock);   <- fast path
1840          *                      free = atomic_dec_and_test(foo->refcnt);
1841          *                      rtmutex_unlock(foo->lock); <- fast path
1842          *                      if (free)
1843          *                              kfree(foo);
1844          * raw_spin_unlock(foo->lock->wait_lock);
1845          *
1846          * So for the fastpath enabled kernel:
1847          *
1848          * Nothing can set the waiters bit as long as we hold
1849          * lock->wait_lock. So we do the following sequence:
1850          *
1851          *      owner = rt_mutex_owner(lock);
1852          *      clear_rt_mutex_waiters(lock);
1853          *      raw_spin_unlock(&lock->wait_lock);
1854          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
1855          *              return;
1856          *      goto retry;
1857          *
1858          * The fastpath disabled variant is simple as all access to
1859          * lock->owner is serialized by lock->wait_lock:
1860          *
1861          *      lock->owner = NULL;
1862          *      raw_spin_unlock(&lock->wait_lock);
1863          */
1864         while (!rt_mutex_has_waiters(lock)) {
1865                 /* Drops lock->wait_lock ! */
1866                 if (unlock_rt_mutex_safe(lock, flags) == true)
1867                         return false;
1868                 /* Relock the rtmutex and try again */
1869                 raw_spin_lock_irqsave(&lock->wait_lock, flags);
1870         }
1871
1872         /*
1873          * The wakeup next waiter path does not suffer from the above
1874          * race. See the comments there.
1875          *
1876          * Queue the next waiter for wakeup once we release the wait_lock.
1877          */
1878         mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock);
1879
1880         raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1881
1882         /* check PI boosting */
1883         return true;
1884 }
1885
1886 /*
1887  * debug aware fast / slowpath lock,trylock,unlock
1888  *
1889  * The atomic acquire/release ops are compiled away, when either the
1890  * architecture does not support cmpxchg or when debugging is enabled.
1891  */
1892 static inline int
1893 rt_mutex_fastlock(struct rt_mutex *lock, int state,
1894                   struct ww_acquire_ctx *ww_ctx,
1895                   int (*slowfn)(struct rt_mutex *lock, int state,
1896                                 struct hrtimer_sleeper *timeout,
1897                                 enum rtmutex_chainwalk chwalk,
1898                                 struct ww_acquire_ctx *ww_ctx))
1899 {
1900         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1901                 rt_mutex_deadlock_account_lock(lock, current);
1902                 return 0;
1903         } else
1904                 return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
1905                               ww_ctx);
1906 }
1907
1908 static inline int
1909 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
1910                         struct hrtimer_sleeper *timeout,
1911                         enum rtmutex_chainwalk chwalk,
1912                         struct ww_acquire_ctx *ww_ctx,
1913                         int (*slowfn)(struct rt_mutex *lock, int state,
1914                                       struct hrtimer_sleeper *timeout,
1915                                       enum rtmutex_chainwalk chwalk,
1916                                       struct ww_acquire_ctx *ww_ctx))
1917 {
1918         if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
1919             likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1920                 rt_mutex_deadlock_account_lock(lock, current);
1921                 return 0;
1922         } else
1923                 return slowfn(lock, state, timeout, chwalk, ww_ctx);
1924 }
1925
1926 static inline int
1927 rt_mutex_fasttrylock(struct rt_mutex *lock,
1928                      int (*slowfn)(struct rt_mutex *lock))
1929 {
1930         if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
1931                 rt_mutex_deadlock_account_lock(lock, current);
1932                 return 1;
1933         }
1934         return slowfn(lock);
1935 }
1936
1937 static inline void
1938 rt_mutex_fastunlock(struct rt_mutex *lock,
1939                     bool (*slowfn)(struct rt_mutex *lock,
1940                                    struct wake_q_head *wqh,
1941                                    struct wake_q_head *wq_sleeper))
1942 {
1943         WAKE_Q(wake_q);
1944         WAKE_Q(wake_sleeper_q);
1945
1946         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
1947                 rt_mutex_deadlock_account_unlock(current);
1948
1949         } else {
1950                 bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q);
1951
1952                 wake_up_q(&wake_q);
1953                 wake_up_q_sleeper(&wake_sleeper_q);
1954
1955                 /* Undo pi boosting if necessary: */
1956                 if (deboost)
1957                         rt_mutex_adjust_prio(current);
1958         }
1959 }
1960
1961 /**
1962  * rt_mutex_lock - lock a rt_mutex
1963  *
1964  * @lock: the rt_mutex to be locked
1965  */
1966 void __sched rt_mutex_lock(struct rt_mutex *lock)
1967 {
1968         might_sleep();
1969
1970         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
1971 }
1972 EXPORT_SYMBOL_GPL(rt_mutex_lock);
1973
1974 /**
1975  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1976  *
1977  * @lock:               the rt_mutex to be locked
1978  *
1979  * Returns:
1980  *  0           on success
1981  * -EINTR       when interrupted by a signal
1982  */
1983 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
1984 {
1985         might_sleep();
1986
1987         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
1988 }
1989 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1990
1991 /*
1992  * Futex variant with full deadlock detection.
1993  */
1994 int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
1995                               struct hrtimer_sleeper *timeout)
1996 {
1997         might_sleep();
1998
1999         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
2000                                        RT_MUTEX_FULL_CHAINWALK, NULL,
2001                                        rt_mutex_slowlock);
2002 }
2003
2004 /**
2005  * rt_mutex_lock_killable - lock a rt_mutex killable
2006  *
2007  * @lock:              the rt_mutex to be locked
2008  * @detect_deadlock:   deadlock detection on/off
2009  *
2010  * Returns:
2011  *  0          on success
2012  * -EINTR      when interrupted by a signal
2013  * -EDEADLK    when the lock would deadlock (when deadlock detection is on)
2014  */
2015 int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
2016 {
2017         might_sleep();
2018
2019         return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
2020 }
2021 EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
2022
2023 /**
2024  * rt_mutex_timed_lock - lock a rt_mutex interruptible
2025  *                      the timeout structure is provided
2026  *                      by the caller
2027  *
2028  * @lock:               the rt_mutex to be locked
2029  * @timeout:            timeout structure or NULL (no timeout)
2030  *
2031  * Returns:
2032  *  0           on success
2033  * -EINTR       when interrupted by a signal
2034  * -ETIMEDOUT   when the timeout expired
2035  */
2036 int
2037 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
2038 {
2039         might_sleep();
2040
2041         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
2042                                        RT_MUTEX_MIN_CHAINWALK,
2043                                        NULL,
2044                                        rt_mutex_slowlock);
2045 }
2046 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
2047
2048 /**
2049  * rt_mutex_trylock - try to lock a rt_mutex
2050  *
2051  * @lock:       the rt_mutex to be locked
2052  *
2053  * This function can only be called in thread context. It's safe to
2054  * call it from atomic regions, but not from hard interrupt or soft
2055  * interrupt context.
2056  *
2057  * Returns 1 on success and 0 on contention
2058  */
2059 int __sched rt_mutex_trylock(struct rt_mutex *lock)
2060 {
2061 #ifdef CONFIG_PREEMPT_RT_FULL
2062         if (WARN_ON(in_irq() || in_nmi()))
2063 #else
2064         if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
2065 #endif
2066                 return 0;
2067
2068         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
2069 }
2070 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
2071
2072 /**
2073  * rt_mutex_unlock - unlock a rt_mutex
2074  *
2075  * @lock: the rt_mutex to be unlocked
2076  */
2077 void __sched rt_mutex_unlock(struct rt_mutex *lock)
2078 {
2079         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
2080 }
2081 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
2082
2083 /**
2084  * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
2085  * @lock: the rt_mutex to be unlocked
2086  *
2087  * Returns: true/false indicating whether priority adjustment is
2088  * required or not.
2089  */
2090 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
2091                                    struct wake_q_head *wqh,
2092                                    struct wake_q_head *wq_sleeper)
2093 {
2094         if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
2095                 rt_mutex_deadlock_account_unlock(current);
2096                 return false;
2097         }
2098         return rt_mutex_slowunlock(lock, wqh, wq_sleeper);
2099 }
2100
2101 /**
2102  * rt_mutex_destroy - mark a mutex unusable
2103  * @lock: the mutex to be destroyed
2104  *
2105  * This function marks the mutex uninitialized, and any subsequent
2106  * use of the mutex is forbidden. The mutex must not be locked when
2107  * this function is called.
2108  */
2109 void rt_mutex_destroy(struct rt_mutex *lock)
2110 {
2111         WARN_ON(rt_mutex_is_locked(lock));
2112 #ifdef CONFIG_DEBUG_RT_MUTEXES
2113         lock->magic = NULL;
2114 #endif
2115 }
2116
2117 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
2118
2119 /**
2120  * __rt_mutex_init - initialize the rt lock
2121  *
2122  * @lock: the rt lock to be initialized
2123  *
2124  * Initialize the rt lock to unlocked state.
2125  *
2126  * Initializing of a locked rt lock is not allowed
2127  */
2128 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
2129 {
2130         lock->owner = NULL;
2131         lock->waiters = RB_ROOT;
2132         lock->waiters_leftmost = NULL;
2133
2134         debug_rt_mutex_init(lock, name);
2135 }
2136 EXPORT_SYMBOL(__rt_mutex_init);
2137
2138 /**
2139  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
2140  *                              proxy owner
2141  *
2142  * @lock:       the rt_mutex to be locked
2143  * @proxy_owner:the task to set as owner
2144  *
2145  * No locking. Caller has to do serializing itself
2146  * Special API call for PI-futex support
2147  */
2148 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
2149                                 struct task_struct *proxy_owner)
2150 {
2151         rt_mutex_init(lock);
2152         debug_rt_mutex_proxy_lock(lock, proxy_owner);
2153         rt_mutex_set_owner(lock, proxy_owner);
2154         rt_mutex_deadlock_account_lock(lock, proxy_owner);
2155 }
2156
2157 /**
2158  * rt_mutex_proxy_unlock - release a lock on behalf of owner
2159  *
2160  * @lock:       the rt_mutex to be locked
2161  *
2162  * No locking. Caller has to do serializing itself
2163  * Special API call for PI-futex support
2164  */
2165 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
2166                            struct task_struct *proxy_owner)
2167 {
2168         debug_rt_mutex_proxy_unlock(lock);
2169         rt_mutex_set_owner(lock, NULL);
2170         rt_mutex_deadlock_account_unlock(proxy_owner);
2171 }
2172
2173 /**
2174  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
2175  * @lock:               the rt_mutex to take
2176  * @waiter:             the pre-initialized rt_mutex_waiter
2177  * @task:               the task to prepare
2178  *
2179  * Returns:
2180  *  0 - task blocked on lock
2181  *  1 - acquired the lock for task, caller should wake it up
2182  * <0 - error
2183  *
2184  * Special API call for FUTEX_REQUEUE_PI support.
2185  */
2186 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
2187                               struct rt_mutex_waiter *waiter,
2188                               struct task_struct *task)
2189 {
2190         int ret;
2191
2192         raw_spin_lock_irq(&lock->wait_lock);
2193
2194         if (try_to_take_rt_mutex(lock, task, NULL)) {
2195                 raw_spin_unlock_irq(&lock->wait_lock);
2196                 return 1;
2197         }
2198
2199 #ifdef CONFIG_PREEMPT_RT_FULL
2200         /*
2201          * In PREEMPT_RT there's an added race.
2202          * If the task, that we are about to requeue, times out,
2203          * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
2204          * to skip this task. But right after the task sets
2205          * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
2206          * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
2207          * This will replace the PI_WAKEUP_INPROGRESS with the actual
2208          * lock that it blocks on. We *must not* place this task
2209          * on this proxy lock in that case.
2210          *
2211          * To prevent this race, we first take the task's pi_lock
2212          * and check if it has updated its pi_blocked_on. If it has,
2213          * we assume that it woke up and we return -EAGAIN.
2214          * Otherwise, we set the task's pi_blocked_on to
2215          * PI_REQUEUE_INPROGRESS, so that if the task is waking up
2216          * it will know that we are in the process of requeuing it.
2217          */
2218         raw_spin_lock(&task->pi_lock);
2219         if (task->pi_blocked_on) {
2220                 raw_spin_unlock(&task->pi_lock);
2221                 raw_spin_unlock_irq(&lock->wait_lock);
2222                 return -EAGAIN;
2223         }
2224         task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
2225         raw_spin_unlock(&task->pi_lock);
2226 #endif
2227
2228         /* We enforce deadlock detection for futexes */
2229         ret = task_blocks_on_rt_mutex(lock, waiter, task,
2230                                       RT_MUTEX_FULL_CHAINWALK);
2231
2232         if (ret && !rt_mutex_owner(lock)) {
2233                 /*
2234                  * Reset the return value. We might have
2235                  * returned with -EDEADLK and the owner
2236                  * released the lock while we were walking the
2237                  * pi chain.  Let the waiter sort it out.
2238                  */
2239                 ret = 0;
2240         }
2241
2242         if (ret && rt_mutex_has_waiters(lock))
2243                 remove_waiter(lock, waiter);
2244
2245         raw_spin_unlock_irq(&lock->wait_lock);
2246
2247         debug_rt_mutex_print_deadlock(waiter);
2248
2249         return ret;
2250 }
2251
2252 /**
2253  * rt_mutex_next_owner - return the next owner of the lock
2254  *
2255  * @lock: the rt lock query
2256  *
2257  * Returns the next owner of the lock or NULL
2258  *
2259  * Caller has to serialize against other accessors to the lock
2260  * itself.
2261  *
2262  * Special API call for PI-futex support
2263  */
2264 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
2265 {
2266         if (!rt_mutex_has_waiters(lock))
2267                 return NULL;
2268
2269         return rt_mutex_top_waiter(lock)->task;
2270 }
2271
2272 /**
2273  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
2274  * @lock:               the rt_mutex we were woken on
2275  * @to:                 the timeout, null if none. hrtimer should already have
2276  *                      been started.
2277  * @waiter:             the pre-initialized rt_mutex_waiter
2278  *
2279  * Complete the lock acquisition started our behalf by another thread.
2280  *
2281  * Returns:
2282  *  0 - success
2283  * <0 - error, one of -EINTR, -ETIMEDOUT
2284  *
2285  * Special API call for PI-futex requeue support
2286  */
2287 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
2288                                struct hrtimer_sleeper *to,
2289                                struct rt_mutex_waiter *waiter)
2290 {
2291         int ret;
2292
2293         raw_spin_lock_irq(&lock->wait_lock);
2294
2295         set_current_state(TASK_INTERRUPTIBLE);
2296
2297         /* sleep on the mutex */
2298         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
2299
2300         if (unlikely(ret))
2301                 remove_waiter(lock, waiter);
2302
2303         /*
2304          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
2305          * have to fix that up.
2306          */
2307         fixup_rt_mutex_waiters(lock);
2308
2309         raw_spin_unlock_irq(&lock->wait_lock);
2310
2311         return ret;
2312 }
2313
2314 static inline int
2315 ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
2316 {
2317 #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
2318         unsigned tmp;
2319
2320         if (ctx->deadlock_inject_countdown-- == 0) {
2321                 tmp = ctx->deadlock_inject_interval;
2322                 if (tmp > UINT_MAX/4)
2323                         tmp = UINT_MAX;
2324                 else
2325                         tmp = tmp*2 + tmp + tmp/2;
2326
2327                 ctx->deadlock_inject_interval = tmp;
2328                 ctx->deadlock_inject_countdown = tmp;
2329                 ctx->contending_lock = lock;
2330
2331                 ww_mutex_unlock(lock);
2332
2333                 return -EDEADLK;
2334         }
2335 #endif
2336
2337         return 0;
2338 }
2339
2340 #ifdef CONFIG_PREEMPT_RT_FULL
2341 int __sched
2342 __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
2343 {
2344         int ret;
2345
2346         might_sleep();
2347
2348         mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
2349         ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
2350         if (ret)
2351                 mutex_release(&lock->base.dep_map, 1, _RET_IP_);
2352         else if (!ret && ww_ctx->acquired > 1)
2353                 return ww_mutex_deadlock_injection(lock, ww_ctx);
2354
2355         return ret;
2356 }
2357 EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
2358
2359 int __sched
2360 __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
2361 {
2362         int ret;
2363
2364         might_sleep();
2365
2366         mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
2367         ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
2368         if (ret)
2369                 mutex_release(&lock->base.dep_map, 1, _RET_IP_);
2370         else if (!ret && ww_ctx->acquired > 1)
2371                 return ww_mutex_deadlock_injection(lock, ww_ctx);
2372
2373         return ret;
2374 }
2375 EXPORT_SYMBOL_GPL(__ww_mutex_lock);
2376
2377 void __sched ww_mutex_unlock(struct ww_mutex *lock)
2378 {
2379         int nest = !!lock->ctx;
2380
2381         /*
2382          * The unlocking fastpath is the 0->1 transition from 'locked'
2383          * into 'unlocked' state:
2384          */
2385         if (nest) {
2386 #ifdef CONFIG_DEBUG_MUTEXES
2387                 DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
2388 #endif
2389                 if (lock->ctx->acquired > 0)
2390                         lock->ctx->acquired--;
2391                 lock->ctx = NULL;
2392         }
2393
2394         mutex_release(&lock->base.dep_map, nest, _RET_IP_);
2395         rt_mutex_unlock(&lock->base.lock);
2396 }
2397 EXPORT_SYMBOL(ww_mutex_unlock);
2398 #endif