Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / Documentation / RCU / rcuref.txt
1 Reference-count design for elements of lists/arrays protected by RCU.
2
3
4 Please note that the percpu-ref feature is likely your first
5 stop if you need to combine reference counts and RCU.  Please see
6 include/linux/percpu-refcount.h for more information.  However, in
7 those unusual cases where percpu-ref would consume too much memory,
8 please read on.
9
10 ------------------------------------------------------------------------
11
12 Reference counting on elements of lists which are protected by traditional
13 reader/writer spinlocks or semaphores are straightforward:
14
15 1.                              2.
16 add()                           search_and_reference()
17 {                               {
18     alloc_object                    read_lock(&list_lock);
19     ...                             search_for_element
20     atomic_set(&el->rc, 1);         atomic_inc(&el->rc);
21     write_lock(&list_lock);          ...
22     add_element                     read_unlock(&list_lock);
23     ...                             ...
24     write_unlock(&list_lock);   }
25 }
26
27 3.                                      4.
28 release_referenced()                    delete()
29 {                                       {
30     ...                                     write_lock(&list_lock);
31     atomic_dec(&el->rc, relfunc)            ...
32     ...                                     remove_element
33 }                                           write_unlock(&list_lock);
34                                             ...
35                                             if (atomic_dec_and_test(&el->rc))
36                                                 kfree(el);
37                                             ...
38                                         }
39
40 If this list/array is made lock free using RCU as in changing the
41 write_lock() in add() and delete() to spin_lock() and changing read_lock()
42 in search_and_reference() to rcu_read_lock(), the atomic_inc() in
43 search_and_reference() could potentially hold reference to an element which
44 has already been deleted from the list/array.  Use atomic_inc_not_zero()
45 in this scenario as follows:
46
47 1.                                      2.
48 add()                                   search_and_reference()
49 {                                       {
50     alloc_object                            rcu_read_lock();
51     ...                                     search_for_element
52     atomic_set(&el->rc, 1);                 if (!atomic_inc_not_zero(&el->rc)) {
53     spin_lock(&list_lock);                      rcu_read_unlock();
54                                                 return FAIL;
55     add_element                             }
56     ...                                     ...
57     spin_unlock(&list_lock);                rcu_read_unlock();
58 }                                       }
59 3.                                      4.
60 release_referenced()                    delete()
61 {                                       {
62     ...                                     spin_lock(&list_lock);
63     if (atomic_dec_and_test(&el->rc))       ...
64         call_rcu(&el->head, el_free);       remove_element
65     ...                                     spin_unlock(&list_lock);
66 }                                           ...
67                                             if (atomic_dec_and_test(&el->rc))
68                                                 call_rcu(&el->head, el_free);
69                                             ...
70                                         }
71
72 Sometimes, a reference to the element needs to be obtained in the
73 update (write) stream.  In such cases, atomic_inc_not_zero() might be
74 overkill, since we hold the update-side spinlock.  One might instead
75 use atomic_inc() in such cases.
76
77 It is not always convenient to deal with "FAIL" in the
78 search_and_reference() code path.  In such cases, the
79 atomic_dec_and_test() may be moved from delete() to el_free()
80 as follows:
81
82 1.                                      2.
83 add()                                   search_and_reference()
84 {                                       {
85     alloc_object                            rcu_read_lock();
86     ...                                     search_for_element
87     atomic_set(&el->rc, 1);                 atomic_inc(&el->rc);
88     spin_lock(&list_lock);                  ...
89
90     add_element                             rcu_read_unlock();
91     ...                                 }
92     spin_unlock(&list_lock);            4.
93 }                                       delete()
94 3.                                      {
95 release_referenced()                        spin_lock(&list_lock);
96 {                                           ...
97     ...                                     remove_element
98     if (atomic_dec_and_test(&el->rc))       spin_unlock(&list_lock);
99         kfree(el);                          ...
100     ...                                     call_rcu(&el->head, el_free);
101 }                                           ...
102 5.                                      }
103 void el_free(struct rcu_head *rhp)
104 {
105     release_referenced();
106 }
107
108 The key point is that the initial reference added by add() is not removed
109 until after a grace period has elapsed following removal.  This means that
110 search_and_reference() cannot find this element, which means that the value
111 of el->rc cannot increase.  Thus, once it reaches zero, there are no
112 readers that can or ever will be able to reference the element.  The
113 element can therefore safely be freed.  This in turn guarantees that if
114 any reader finds the element, that reader may safely acquire a reference
115 without checking the value of the reference counter.
116
117 In cases where delete() can sleep, synchronize_rcu() can be called from
118 delete(), so that el_free() can be subsumed into delete as follows:
119
120 4.
121 delete()
122 {
123     spin_lock(&list_lock);
124     ...
125     remove_element
126     spin_unlock(&list_lock);
127     ...
128     synchronize_rcu();
129     if (atomic_dec_and_test(&el->rc))
130         kfree(el);
131     ...
132 }