Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37 /* TODO XXX FIXME
38  *  - subvol delete -> delete when ref goes to 0? delete limits also?
39  *  - reorganize keys
40  *  - compressed
41  *  - sync
42  *  - copy also limits on subvol creation
43  *  - limit
44  *  - caches fuer ulists
45  *  - performance benchmarks
46  *  - check all ioctl parameters
47  */
48
49 /*
50  * one struct for each qgroup, organized in fs_info->qgroup_tree.
51  */
52 struct btrfs_qgroup {
53         u64 qgroupid;
54
55         /*
56          * state
57          */
58         u64 rfer;       /* referenced */
59         u64 rfer_cmpr;  /* referenced compressed */
60         u64 excl;       /* exclusive */
61         u64 excl_cmpr;  /* exclusive compressed */
62
63         /*
64          * limits
65          */
66         u64 lim_flags;  /* which limits are set */
67         u64 max_rfer;
68         u64 max_excl;
69         u64 rsv_rfer;
70         u64 rsv_excl;
71
72         /*
73          * reservation tracking
74          */
75         u64 reserved;
76
77         /*
78          * lists
79          */
80         struct list_head groups;  /* groups this group is member of */
81         struct list_head members; /* groups that are members of this group */
82         struct list_head dirty;   /* dirty groups */
83         struct rb_node node;      /* tree of qgroups */
84
85         /*
86          * temp variables for accounting operations
87          */
88         u64 old_refcnt;
89         u64 new_refcnt;
90 };
91
92 /*
93  * glue structure to represent the relations between qgroups.
94  */
95 struct btrfs_qgroup_list {
96         struct list_head next_group;
97         struct list_head next_member;
98         struct btrfs_qgroup *group;
99         struct btrfs_qgroup *member;
100 };
101
102 #define ptr_to_u64(x) ((u64)(uintptr_t)x)
103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104
105 static int
106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
107                    int init_flags);
108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
109
110 /* must be called with qgroup_ioctl_lock held */
111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
112                                            u64 qgroupid)
113 {
114         struct rb_node *n = fs_info->qgroup_tree.rb_node;
115         struct btrfs_qgroup *qgroup;
116
117         while (n) {
118                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
119                 if (qgroup->qgroupid < qgroupid)
120                         n = n->rb_left;
121                 else if (qgroup->qgroupid > qgroupid)
122                         n = n->rb_right;
123                 else
124                         return qgroup;
125         }
126         return NULL;
127 }
128
129 /* must be called with qgroup_lock held */
130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
131                                           u64 qgroupid)
132 {
133         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
134         struct rb_node *parent = NULL;
135         struct btrfs_qgroup *qgroup;
136
137         while (*p) {
138                 parent = *p;
139                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
140
141                 if (qgroup->qgroupid < qgroupid)
142                         p = &(*p)->rb_left;
143                 else if (qgroup->qgroupid > qgroupid)
144                         p = &(*p)->rb_right;
145                 else
146                         return qgroup;
147         }
148
149         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
150         if (!qgroup)
151                 return ERR_PTR(-ENOMEM);
152
153         qgroup->qgroupid = qgroupid;
154         INIT_LIST_HEAD(&qgroup->groups);
155         INIT_LIST_HEAD(&qgroup->members);
156         INIT_LIST_HEAD(&qgroup->dirty);
157
158         rb_link_node(&qgroup->node, parent, p);
159         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
160
161         return qgroup;
162 }
163
164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
165 {
166         struct btrfs_qgroup_list *list;
167
168         list_del(&qgroup->dirty);
169         while (!list_empty(&qgroup->groups)) {
170                 list = list_first_entry(&qgroup->groups,
171                                         struct btrfs_qgroup_list, next_group);
172                 list_del(&list->next_group);
173                 list_del(&list->next_member);
174                 kfree(list);
175         }
176
177         while (!list_empty(&qgroup->members)) {
178                 list = list_first_entry(&qgroup->members,
179                                         struct btrfs_qgroup_list, next_member);
180                 list_del(&list->next_group);
181                 list_del(&list->next_member);
182                 kfree(list);
183         }
184         kfree(qgroup);
185 }
186
187 /* must be called with qgroup_lock held */
188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
189 {
190         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
191
192         if (!qgroup)
193                 return -ENOENT;
194
195         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
196         __del_qgroup_rb(qgroup);
197         return 0;
198 }
199
200 /* must be called with qgroup_lock held */
201 static int add_relation_rb(struct btrfs_fs_info *fs_info,
202                            u64 memberid, u64 parentid)
203 {
204         struct btrfs_qgroup *member;
205         struct btrfs_qgroup *parent;
206         struct btrfs_qgroup_list *list;
207
208         member = find_qgroup_rb(fs_info, memberid);
209         parent = find_qgroup_rb(fs_info, parentid);
210         if (!member || !parent)
211                 return -ENOENT;
212
213         list = kzalloc(sizeof(*list), GFP_ATOMIC);
214         if (!list)
215                 return -ENOMEM;
216
217         list->group = parent;
218         list->member = member;
219         list_add_tail(&list->next_group, &member->groups);
220         list_add_tail(&list->next_member, &parent->members);
221
222         return 0;
223 }
224
225 /* must be called with qgroup_lock held */
226 static int del_relation_rb(struct btrfs_fs_info *fs_info,
227                            u64 memberid, u64 parentid)
228 {
229         struct btrfs_qgroup *member;
230         struct btrfs_qgroup *parent;
231         struct btrfs_qgroup_list *list;
232
233         member = find_qgroup_rb(fs_info, memberid);
234         parent = find_qgroup_rb(fs_info, parentid);
235         if (!member || !parent)
236                 return -ENOENT;
237
238         list_for_each_entry(list, &member->groups, next_group) {
239                 if (list->group == parent) {
240                         list_del(&list->next_group);
241                         list_del(&list->next_member);
242                         kfree(list);
243                         return 0;
244                 }
245         }
246         return -ENOENT;
247 }
248
249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
251                                u64 rfer, u64 excl)
252 {
253         struct btrfs_qgroup *qgroup;
254
255         qgroup = find_qgroup_rb(fs_info, qgroupid);
256         if (!qgroup)
257                 return -EINVAL;
258         if (qgroup->rfer != rfer || qgroup->excl != excl)
259                 return -EINVAL;
260         return 0;
261 }
262 #endif
263
264 /*
265  * The full config is read in one go, only called from open_ctree()
266  * It doesn't use any locking, as at this point we're still single-threaded
267  */
268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
269 {
270         struct btrfs_key key;
271         struct btrfs_key found_key;
272         struct btrfs_root *quota_root = fs_info->quota_root;
273         struct btrfs_path *path = NULL;
274         struct extent_buffer *l;
275         int slot;
276         int ret = 0;
277         u64 flags = 0;
278         u64 rescan_progress = 0;
279
280         if (!fs_info->quota_enabled)
281                 return 0;
282
283         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
284         if (!fs_info->qgroup_ulist) {
285                 ret = -ENOMEM;
286                 goto out;
287         }
288
289         path = btrfs_alloc_path();
290         if (!path) {
291                 ret = -ENOMEM;
292                 goto out;
293         }
294
295         /* default this to quota off, in case no status key is found */
296         fs_info->qgroup_flags = 0;
297
298         /*
299          * pass 1: read status, all qgroup infos and limits
300          */
301         key.objectid = 0;
302         key.type = 0;
303         key.offset = 0;
304         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
305         if (ret)
306                 goto out;
307
308         while (1) {
309                 struct btrfs_qgroup *qgroup;
310
311                 slot = path->slots[0];
312                 l = path->nodes[0];
313                 btrfs_item_key_to_cpu(l, &found_key, slot);
314
315                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
316                         struct btrfs_qgroup_status_item *ptr;
317
318                         ptr = btrfs_item_ptr(l, slot,
319                                              struct btrfs_qgroup_status_item);
320
321                         if (btrfs_qgroup_status_version(l, ptr) !=
322                             BTRFS_QGROUP_STATUS_VERSION) {
323                                 btrfs_err(fs_info,
324                                  "old qgroup version, quota disabled");
325                                 goto out;
326                         }
327                         if (btrfs_qgroup_status_generation(l, ptr) !=
328                             fs_info->generation) {
329                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330                                 btrfs_err(fs_info,
331                                         "qgroup generation mismatch, "
332                                         "marked as inconsistent");
333                         }
334                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
335                                                                           ptr);
336                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
337                         goto next1;
338                 }
339
340                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
341                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
342                         goto next1;
343
344                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
345                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
346                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
347                         btrfs_err(fs_info, "inconsitent qgroup config");
348                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
349                 }
350                 if (!qgroup) {
351                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
352                         if (IS_ERR(qgroup)) {
353                                 ret = PTR_ERR(qgroup);
354                                 goto out;
355                         }
356                 }
357                 switch (found_key.type) {
358                 case BTRFS_QGROUP_INFO_KEY: {
359                         struct btrfs_qgroup_info_item *ptr;
360
361                         ptr = btrfs_item_ptr(l, slot,
362                                              struct btrfs_qgroup_info_item);
363                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
364                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
365                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
366                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
367                         /* generation currently unused */
368                         break;
369                 }
370                 case BTRFS_QGROUP_LIMIT_KEY: {
371                         struct btrfs_qgroup_limit_item *ptr;
372
373                         ptr = btrfs_item_ptr(l, slot,
374                                              struct btrfs_qgroup_limit_item);
375                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
376                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
377                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
378                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
379                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
380                         break;
381                 }
382                 }
383 next1:
384                 ret = btrfs_next_item(quota_root, path);
385                 if (ret < 0)
386                         goto out;
387                 if (ret)
388                         break;
389         }
390         btrfs_release_path(path);
391
392         /*
393          * pass 2: read all qgroup relations
394          */
395         key.objectid = 0;
396         key.type = BTRFS_QGROUP_RELATION_KEY;
397         key.offset = 0;
398         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
399         if (ret)
400                 goto out;
401         while (1) {
402                 slot = path->slots[0];
403                 l = path->nodes[0];
404                 btrfs_item_key_to_cpu(l, &found_key, slot);
405
406                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
407                         goto next2;
408
409                 if (found_key.objectid > found_key.offset) {
410                         /* parent <- member, not needed to build config */
411                         /* FIXME should we omit the key completely? */
412                         goto next2;
413                 }
414
415                 ret = add_relation_rb(fs_info, found_key.objectid,
416                                       found_key.offset);
417                 if (ret == -ENOENT) {
418                         btrfs_warn(fs_info,
419                                 "orphan qgroup relation 0x%llx->0x%llx",
420                                 found_key.objectid, found_key.offset);
421                         ret = 0;        /* ignore the error */
422                 }
423                 if (ret)
424                         goto out;
425 next2:
426                 ret = btrfs_next_item(quota_root, path);
427                 if (ret < 0)
428                         goto out;
429                 if (ret)
430                         break;
431         }
432 out:
433         fs_info->qgroup_flags |= flags;
434         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
435                 fs_info->quota_enabled = 0;
436                 fs_info->pending_quota_state = 0;
437         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
438                    ret >= 0) {
439                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
440         }
441         btrfs_free_path(path);
442
443         if (ret < 0) {
444                 ulist_free(fs_info->qgroup_ulist);
445                 fs_info->qgroup_ulist = NULL;
446                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
447         }
448
449         return ret < 0 ? ret : 0;
450 }
451
452 /*
453  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
454  * first two are in single-threaded paths.And for the third one, we have set
455  * quota_root to be null with qgroup_lock held before, so it is safe to clean
456  * up the in-memory structures without qgroup_lock held.
457  */
458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
459 {
460         struct rb_node *n;
461         struct btrfs_qgroup *qgroup;
462
463         while ((n = rb_first(&fs_info->qgroup_tree))) {
464                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
465                 rb_erase(n, &fs_info->qgroup_tree);
466                 __del_qgroup_rb(qgroup);
467         }
468         /*
469          * we call btrfs_free_qgroup_config() when umounting
470          * filesystem and disabling quota, so we set qgroup_ulit
471          * to be null here to avoid double free.
472          */
473         ulist_free(fs_info->qgroup_ulist);
474         fs_info->qgroup_ulist = NULL;
475 }
476
477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
478                                     struct btrfs_root *quota_root,
479                                     u64 src, u64 dst)
480 {
481         int ret;
482         struct btrfs_path *path;
483         struct btrfs_key key;
484
485         path = btrfs_alloc_path();
486         if (!path)
487                 return -ENOMEM;
488
489         key.objectid = src;
490         key.type = BTRFS_QGROUP_RELATION_KEY;
491         key.offset = dst;
492
493         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
494
495         btrfs_mark_buffer_dirty(path->nodes[0]);
496
497         btrfs_free_path(path);
498         return ret;
499 }
500
501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
502                                     struct btrfs_root *quota_root,
503                                     u64 src, u64 dst)
504 {
505         int ret;
506         struct btrfs_path *path;
507         struct btrfs_key key;
508
509         path = btrfs_alloc_path();
510         if (!path)
511                 return -ENOMEM;
512
513         key.objectid = src;
514         key.type = BTRFS_QGROUP_RELATION_KEY;
515         key.offset = dst;
516
517         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
518         if (ret < 0)
519                 goto out;
520
521         if (ret > 0) {
522                 ret = -ENOENT;
523                 goto out;
524         }
525
526         ret = btrfs_del_item(trans, quota_root, path);
527 out:
528         btrfs_free_path(path);
529         return ret;
530 }
531
532 static int add_qgroup_item(struct btrfs_trans_handle *trans,
533                            struct btrfs_root *quota_root, u64 qgroupid)
534 {
535         int ret;
536         struct btrfs_path *path;
537         struct btrfs_qgroup_info_item *qgroup_info;
538         struct btrfs_qgroup_limit_item *qgroup_limit;
539         struct extent_buffer *leaf;
540         struct btrfs_key key;
541
542         if (btrfs_test_is_dummy_root(quota_root))
543                 return 0;
544
545         path = btrfs_alloc_path();
546         if (!path)
547                 return -ENOMEM;
548
549         key.objectid = 0;
550         key.type = BTRFS_QGROUP_INFO_KEY;
551         key.offset = qgroupid;
552
553         /*
554          * Avoid a transaction abort by catching -EEXIST here. In that
555          * case, we proceed by re-initializing the existing structure
556          * on disk.
557          */
558
559         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
560                                       sizeof(*qgroup_info));
561         if (ret && ret != -EEXIST)
562                 goto out;
563
564         leaf = path->nodes[0];
565         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
566                                  struct btrfs_qgroup_info_item);
567         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
568         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
569         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
570         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
571         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
572
573         btrfs_mark_buffer_dirty(leaf);
574
575         btrfs_release_path(path);
576
577         key.type = BTRFS_QGROUP_LIMIT_KEY;
578         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
579                                       sizeof(*qgroup_limit));
580         if (ret && ret != -EEXIST)
581                 goto out;
582
583         leaf = path->nodes[0];
584         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
585                                   struct btrfs_qgroup_limit_item);
586         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
587         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
588         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
589         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
590         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
591
592         btrfs_mark_buffer_dirty(leaf);
593
594         ret = 0;
595 out:
596         btrfs_free_path(path);
597         return ret;
598 }
599
600 static int del_qgroup_item(struct btrfs_trans_handle *trans,
601                            struct btrfs_root *quota_root, u64 qgroupid)
602 {
603         int ret;
604         struct btrfs_path *path;
605         struct btrfs_key key;
606
607         path = btrfs_alloc_path();
608         if (!path)
609                 return -ENOMEM;
610
611         key.objectid = 0;
612         key.type = BTRFS_QGROUP_INFO_KEY;
613         key.offset = qgroupid;
614         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
615         if (ret < 0)
616                 goto out;
617
618         if (ret > 0) {
619                 ret = -ENOENT;
620                 goto out;
621         }
622
623         ret = btrfs_del_item(trans, quota_root, path);
624         if (ret)
625                 goto out;
626
627         btrfs_release_path(path);
628
629         key.type = BTRFS_QGROUP_LIMIT_KEY;
630         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
631         if (ret < 0)
632                 goto out;
633
634         if (ret > 0) {
635                 ret = -ENOENT;
636                 goto out;
637         }
638
639         ret = btrfs_del_item(trans, quota_root, path);
640
641 out:
642         btrfs_free_path(path);
643         return ret;
644 }
645
646 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
647                                     struct btrfs_root *root,
648                                     struct btrfs_qgroup *qgroup)
649 {
650         struct btrfs_path *path;
651         struct btrfs_key key;
652         struct extent_buffer *l;
653         struct btrfs_qgroup_limit_item *qgroup_limit;
654         int ret;
655         int slot;
656
657         key.objectid = 0;
658         key.type = BTRFS_QGROUP_LIMIT_KEY;
659         key.offset = qgroup->qgroupid;
660
661         path = btrfs_alloc_path();
662         if (!path)
663                 return -ENOMEM;
664
665         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
666         if (ret > 0)
667                 ret = -ENOENT;
668
669         if (ret)
670                 goto out;
671
672         l = path->nodes[0];
673         slot = path->slots[0];
674         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
675         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
676         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
677         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
678         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
679         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
680
681         btrfs_mark_buffer_dirty(l);
682
683 out:
684         btrfs_free_path(path);
685         return ret;
686 }
687
688 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
689                                    struct btrfs_root *root,
690                                    struct btrfs_qgroup *qgroup)
691 {
692         struct btrfs_path *path;
693         struct btrfs_key key;
694         struct extent_buffer *l;
695         struct btrfs_qgroup_info_item *qgroup_info;
696         int ret;
697         int slot;
698
699         if (btrfs_test_is_dummy_root(root))
700                 return 0;
701
702         key.objectid = 0;
703         key.type = BTRFS_QGROUP_INFO_KEY;
704         key.offset = qgroup->qgroupid;
705
706         path = btrfs_alloc_path();
707         if (!path)
708                 return -ENOMEM;
709
710         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
711         if (ret > 0)
712                 ret = -ENOENT;
713
714         if (ret)
715                 goto out;
716
717         l = path->nodes[0];
718         slot = path->slots[0];
719         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
720         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
721         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
722         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
723         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
724         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
725
726         btrfs_mark_buffer_dirty(l);
727
728 out:
729         btrfs_free_path(path);
730         return ret;
731 }
732
733 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
734                                      struct btrfs_fs_info *fs_info,
735                                     struct btrfs_root *root)
736 {
737         struct btrfs_path *path;
738         struct btrfs_key key;
739         struct extent_buffer *l;
740         struct btrfs_qgroup_status_item *ptr;
741         int ret;
742         int slot;
743
744         key.objectid = 0;
745         key.type = BTRFS_QGROUP_STATUS_KEY;
746         key.offset = 0;
747
748         path = btrfs_alloc_path();
749         if (!path)
750                 return -ENOMEM;
751
752         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
753         if (ret > 0)
754                 ret = -ENOENT;
755
756         if (ret)
757                 goto out;
758
759         l = path->nodes[0];
760         slot = path->slots[0];
761         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
762         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
763         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
764         btrfs_set_qgroup_status_rescan(l, ptr,
765                                 fs_info->qgroup_rescan_progress.objectid);
766
767         btrfs_mark_buffer_dirty(l);
768
769 out:
770         btrfs_free_path(path);
771         return ret;
772 }
773
774 /*
775  * called with qgroup_lock held
776  */
777 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
778                                   struct btrfs_root *root)
779 {
780         struct btrfs_path *path;
781         struct btrfs_key key;
782         struct extent_buffer *leaf = NULL;
783         int ret;
784         int nr = 0;
785
786         path = btrfs_alloc_path();
787         if (!path)
788                 return -ENOMEM;
789
790         path->leave_spinning = 1;
791
792         key.objectid = 0;
793         key.offset = 0;
794         key.type = 0;
795
796         while (1) {
797                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
798                 if (ret < 0)
799                         goto out;
800                 leaf = path->nodes[0];
801                 nr = btrfs_header_nritems(leaf);
802                 if (!nr)
803                         break;
804                 /*
805                  * delete the leaf one by one
806                  * since the whole tree is going
807                  * to be deleted.
808                  */
809                 path->slots[0] = 0;
810                 ret = btrfs_del_items(trans, root, path, 0, nr);
811                 if (ret)
812                         goto out;
813
814                 btrfs_release_path(path);
815         }
816         ret = 0;
817 out:
818         root->fs_info->pending_quota_state = 0;
819         btrfs_free_path(path);
820         return ret;
821 }
822
823 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
824                        struct btrfs_fs_info *fs_info)
825 {
826         struct btrfs_root *quota_root;
827         struct btrfs_root *tree_root = fs_info->tree_root;
828         struct btrfs_path *path = NULL;
829         struct btrfs_qgroup_status_item *ptr;
830         struct extent_buffer *leaf;
831         struct btrfs_key key;
832         struct btrfs_key found_key;
833         struct btrfs_qgroup *qgroup = NULL;
834         int ret = 0;
835         int slot;
836
837         mutex_lock(&fs_info->qgroup_ioctl_lock);
838         if (fs_info->quota_root) {
839                 fs_info->pending_quota_state = 1;
840                 goto out;
841         }
842
843         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
844         if (!fs_info->qgroup_ulist) {
845                 ret = -ENOMEM;
846                 goto out;
847         }
848
849         /*
850          * initially create the quota tree
851          */
852         quota_root = btrfs_create_tree(trans, fs_info,
853                                        BTRFS_QUOTA_TREE_OBJECTID);
854         if (IS_ERR(quota_root)) {
855                 ret =  PTR_ERR(quota_root);
856                 goto out;
857         }
858
859         path = btrfs_alloc_path();
860         if (!path) {
861                 ret = -ENOMEM;
862                 goto out_free_root;
863         }
864
865         key.objectid = 0;
866         key.type = BTRFS_QGROUP_STATUS_KEY;
867         key.offset = 0;
868
869         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
870                                       sizeof(*ptr));
871         if (ret)
872                 goto out_free_path;
873
874         leaf = path->nodes[0];
875         ptr = btrfs_item_ptr(leaf, path->slots[0],
876                                  struct btrfs_qgroup_status_item);
877         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
878         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
879         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
880                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
881         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
882         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
883
884         btrfs_mark_buffer_dirty(leaf);
885
886         key.objectid = 0;
887         key.type = BTRFS_ROOT_REF_KEY;
888         key.offset = 0;
889
890         btrfs_release_path(path);
891         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
892         if (ret > 0)
893                 goto out_add_root;
894         if (ret < 0)
895                 goto out_free_path;
896
897
898         while (1) {
899                 slot = path->slots[0];
900                 leaf = path->nodes[0];
901                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
902
903                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
904                         ret = add_qgroup_item(trans, quota_root,
905                                               found_key.offset);
906                         if (ret)
907                                 goto out_free_path;
908
909                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
910                         if (IS_ERR(qgroup)) {
911                                 ret = PTR_ERR(qgroup);
912                                 goto out_free_path;
913                         }
914                 }
915                 ret = btrfs_next_item(tree_root, path);
916                 if (ret < 0)
917                         goto out_free_path;
918                 if (ret)
919                         break;
920         }
921
922 out_add_root:
923         btrfs_release_path(path);
924         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
925         if (ret)
926                 goto out_free_path;
927
928         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
929         if (IS_ERR(qgroup)) {
930                 ret = PTR_ERR(qgroup);
931                 goto out_free_path;
932         }
933         spin_lock(&fs_info->qgroup_lock);
934         fs_info->quota_root = quota_root;
935         fs_info->pending_quota_state = 1;
936         spin_unlock(&fs_info->qgroup_lock);
937 out_free_path:
938         btrfs_free_path(path);
939 out_free_root:
940         if (ret) {
941                 free_extent_buffer(quota_root->node);
942                 free_extent_buffer(quota_root->commit_root);
943                 kfree(quota_root);
944         }
945 out:
946         if (ret) {
947                 ulist_free(fs_info->qgroup_ulist);
948                 fs_info->qgroup_ulist = NULL;
949         }
950         mutex_unlock(&fs_info->qgroup_ioctl_lock);
951         return ret;
952 }
953
954 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
955                         struct btrfs_fs_info *fs_info)
956 {
957         struct btrfs_root *tree_root = fs_info->tree_root;
958         struct btrfs_root *quota_root;
959         int ret = 0;
960
961         mutex_lock(&fs_info->qgroup_ioctl_lock);
962         if (!fs_info->quota_root)
963                 goto out;
964         spin_lock(&fs_info->qgroup_lock);
965         fs_info->quota_enabled = 0;
966         fs_info->pending_quota_state = 0;
967         quota_root = fs_info->quota_root;
968         fs_info->quota_root = NULL;
969         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
970         spin_unlock(&fs_info->qgroup_lock);
971
972         btrfs_free_qgroup_config(fs_info);
973
974         ret = btrfs_clean_quota_tree(trans, quota_root);
975         if (ret)
976                 goto out;
977
978         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
979         if (ret)
980                 goto out;
981
982         list_del(&quota_root->dirty_list);
983
984         btrfs_tree_lock(quota_root->node);
985         clean_tree_block(trans, tree_root->fs_info, quota_root->node);
986         btrfs_tree_unlock(quota_root->node);
987         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
988
989         free_extent_buffer(quota_root->node);
990         free_extent_buffer(quota_root->commit_root);
991         kfree(quota_root);
992 out:
993         mutex_unlock(&fs_info->qgroup_ioctl_lock);
994         return ret;
995 }
996
997 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
998                          struct btrfs_qgroup *qgroup)
999 {
1000         if (list_empty(&qgroup->dirty))
1001                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1002 }
1003
1004 /*
1005  * The easy accounting, if we are adding/removing the only ref for an extent
1006  * then this qgroup and all of the parent qgroups get their refrence and
1007  * exclusive counts adjusted.
1008  *
1009  * Caller should hold fs_info->qgroup_lock.
1010  */
1011 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1012                                     struct ulist *tmp, u64 ref_root,
1013                                     u64 num_bytes, int sign)
1014 {
1015         struct btrfs_qgroup *qgroup;
1016         struct btrfs_qgroup_list *glist;
1017         struct ulist_node *unode;
1018         struct ulist_iterator uiter;
1019         int ret = 0;
1020
1021         qgroup = find_qgroup_rb(fs_info, ref_root);
1022         if (!qgroup)
1023                 goto out;
1024
1025         qgroup->rfer += sign * num_bytes;
1026         qgroup->rfer_cmpr += sign * num_bytes;
1027
1028         WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1029         qgroup->excl += sign * num_bytes;
1030         qgroup->excl_cmpr += sign * num_bytes;
1031         if (sign > 0)
1032                 qgroup->reserved -= num_bytes;
1033
1034         qgroup_dirty(fs_info, qgroup);
1035
1036         /* Get all of the parent groups that contain this qgroup */
1037         list_for_each_entry(glist, &qgroup->groups, next_group) {
1038                 ret = ulist_add(tmp, glist->group->qgroupid,
1039                                 ptr_to_u64(glist->group), GFP_ATOMIC);
1040                 if (ret < 0)
1041                         goto out;
1042         }
1043
1044         /* Iterate all of the parents and adjust their reference counts */
1045         ULIST_ITER_INIT(&uiter);
1046         while ((unode = ulist_next(tmp, &uiter))) {
1047                 qgroup = u64_to_ptr(unode->aux);
1048                 qgroup->rfer += sign * num_bytes;
1049                 qgroup->rfer_cmpr += sign * num_bytes;
1050                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1051                 qgroup->excl += sign * num_bytes;
1052                 if (sign > 0)
1053                         qgroup->reserved -= num_bytes;
1054                 qgroup->excl_cmpr += sign * num_bytes;
1055                 qgroup_dirty(fs_info, qgroup);
1056
1057                 /* Add any parents of the parents */
1058                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1059                         ret = ulist_add(tmp, glist->group->qgroupid,
1060                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1061                         if (ret < 0)
1062                                 goto out;
1063                 }
1064         }
1065         ret = 0;
1066 out:
1067         return ret;
1068 }
1069
1070
1071 /*
1072  * Quick path for updating qgroup with only excl refs.
1073  *
1074  * In that case, just update all parent will be enough.
1075  * Or we needs to do a full rescan.
1076  * Caller should also hold fs_info->qgroup_lock.
1077  *
1078  * Return 0 for quick update, return >0 for need to full rescan
1079  * and mark INCONSISTENT flag.
1080  * Return < 0 for other error.
1081  */
1082 static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1083                                    struct ulist *tmp, u64 src, u64 dst,
1084                                    int sign)
1085 {
1086         struct btrfs_qgroup *qgroup;
1087         int ret = 1;
1088         int err = 0;
1089
1090         qgroup = find_qgroup_rb(fs_info, src);
1091         if (!qgroup)
1092                 goto out;
1093         if (qgroup->excl == qgroup->rfer) {
1094                 ret = 0;
1095                 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1096                                                qgroup->excl, sign);
1097                 if (err < 0) {
1098                         ret = err;
1099                         goto out;
1100                 }
1101         }
1102 out:
1103         if (ret)
1104                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1105         return ret;
1106 }
1107
1108 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1109                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1110 {
1111         struct btrfs_root *quota_root;
1112         struct btrfs_qgroup *parent;
1113         struct btrfs_qgroup *member;
1114         struct btrfs_qgroup_list *list;
1115         struct ulist *tmp;
1116         int ret = 0;
1117
1118         tmp = ulist_alloc(GFP_NOFS);
1119         if (!tmp)
1120                 return -ENOMEM;
1121
1122         /* Check the level of src and dst first */
1123         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1124                 return -EINVAL;
1125
1126         mutex_lock(&fs_info->qgroup_ioctl_lock);
1127         quota_root = fs_info->quota_root;
1128         if (!quota_root) {
1129                 ret = -EINVAL;
1130                 goto out;
1131         }
1132         member = find_qgroup_rb(fs_info, src);
1133         parent = find_qgroup_rb(fs_info, dst);
1134         if (!member || !parent) {
1135                 ret = -EINVAL;
1136                 goto out;
1137         }
1138
1139         /* check if such qgroup relation exist firstly */
1140         list_for_each_entry(list, &member->groups, next_group) {
1141                 if (list->group == parent) {
1142                         ret = -EEXIST;
1143                         goto out;
1144                 }
1145         }
1146
1147         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1148         if (ret)
1149                 goto out;
1150
1151         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1152         if (ret) {
1153                 del_qgroup_relation_item(trans, quota_root, src, dst);
1154                 goto out;
1155         }
1156
1157         spin_lock(&fs_info->qgroup_lock);
1158         ret = add_relation_rb(quota_root->fs_info, src, dst);
1159         if (ret < 0) {
1160                 spin_unlock(&fs_info->qgroup_lock);
1161                 goto out;
1162         }
1163         ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1164         spin_unlock(&fs_info->qgroup_lock);
1165 out:
1166         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1167         ulist_free(tmp);
1168         return ret;
1169 }
1170
1171 int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1172                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1173 {
1174         struct btrfs_root *quota_root;
1175         struct btrfs_qgroup *parent;
1176         struct btrfs_qgroup *member;
1177         struct btrfs_qgroup_list *list;
1178         struct ulist *tmp;
1179         int ret = 0;
1180         int err;
1181
1182         tmp = ulist_alloc(GFP_NOFS);
1183         if (!tmp)
1184                 return -ENOMEM;
1185
1186         quota_root = fs_info->quota_root;
1187         if (!quota_root) {
1188                 ret = -EINVAL;
1189                 goto out;
1190         }
1191
1192         member = find_qgroup_rb(fs_info, src);
1193         parent = find_qgroup_rb(fs_info, dst);
1194         if (!member || !parent) {
1195                 ret = -EINVAL;
1196                 goto out;
1197         }
1198
1199         /* check if such qgroup relation exist firstly */
1200         list_for_each_entry(list, &member->groups, next_group) {
1201                 if (list->group == parent)
1202                         goto exist;
1203         }
1204         ret = -ENOENT;
1205         goto out;
1206 exist:
1207         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1208         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1209         if (err && !ret)
1210                 ret = err;
1211
1212         spin_lock(&fs_info->qgroup_lock);
1213         del_relation_rb(fs_info, src, dst);
1214         ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1215         spin_unlock(&fs_info->qgroup_lock);
1216 out:
1217         ulist_free(tmp);
1218         return ret;
1219 }
1220
1221 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1222                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1223 {
1224         int ret = 0;
1225
1226         mutex_lock(&fs_info->qgroup_ioctl_lock);
1227         ret = __del_qgroup_relation(trans, fs_info, src, dst);
1228         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1229
1230         return ret;
1231 }
1232
1233 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1234                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1235 {
1236         struct btrfs_root *quota_root;
1237         struct btrfs_qgroup *qgroup;
1238         int ret = 0;
1239
1240         mutex_lock(&fs_info->qgroup_ioctl_lock);
1241         quota_root = fs_info->quota_root;
1242         if (!quota_root) {
1243                 ret = -EINVAL;
1244                 goto out;
1245         }
1246         qgroup = find_qgroup_rb(fs_info, qgroupid);
1247         if (qgroup) {
1248                 ret = -EEXIST;
1249                 goto out;
1250         }
1251
1252         ret = add_qgroup_item(trans, quota_root, qgroupid);
1253         if (ret)
1254                 goto out;
1255
1256         spin_lock(&fs_info->qgroup_lock);
1257         qgroup = add_qgroup_rb(fs_info, qgroupid);
1258         spin_unlock(&fs_info->qgroup_lock);
1259
1260         if (IS_ERR(qgroup))
1261                 ret = PTR_ERR(qgroup);
1262 out:
1263         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1264         return ret;
1265 }
1266
1267 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1268                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1269 {
1270         struct btrfs_root *quota_root;
1271         struct btrfs_qgroup *qgroup;
1272         struct btrfs_qgroup_list *list;
1273         int ret = 0;
1274
1275         mutex_lock(&fs_info->qgroup_ioctl_lock);
1276         quota_root = fs_info->quota_root;
1277         if (!quota_root) {
1278                 ret = -EINVAL;
1279                 goto out;
1280         }
1281
1282         qgroup = find_qgroup_rb(fs_info, qgroupid);
1283         if (!qgroup) {
1284                 ret = -ENOENT;
1285                 goto out;
1286         } else {
1287                 /* check if there are no children of this qgroup */
1288                 if (!list_empty(&qgroup->members)) {
1289                         ret = -EBUSY;
1290                         goto out;
1291                 }
1292         }
1293         ret = del_qgroup_item(trans, quota_root, qgroupid);
1294
1295         while (!list_empty(&qgroup->groups)) {
1296                 list = list_first_entry(&qgroup->groups,
1297                                         struct btrfs_qgroup_list, next_group);
1298                 ret = __del_qgroup_relation(trans, fs_info,
1299                                            qgroupid,
1300                                            list->group->qgroupid);
1301                 if (ret)
1302                         goto out;
1303         }
1304
1305         spin_lock(&fs_info->qgroup_lock);
1306         del_qgroup_rb(quota_root->fs_info, qgroupid);
1307         spin_unlock(&fs_info->qgroup_lock);
1308 out:
1309         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1310         return ret;
1311 }
1312
1313 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1314                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1315                        struct btrfs_qgroup_limit *limit)
1316 {
1317         struct btrfs_root *quota_root;
1318         struct btrfs_qgroup *qgroup;
1319         int ret = 0;
1320
1321         mutex_lock(&fs_info->qgroup_ioctl_lock);
1322         quota_root = fs_info->quota_root;
1323         if (!quota_root) {
1324                 ret = -EINVAL;
1325                 goto out;
1326         }
1327
1328         qgroup = find_qgroup_rb(fs_info, qgroupid);
1329         if (!qgroup) {
1330                 ret = -ENOENT;
1331                 goto out;
1332         }
1333
1334         spin_lock(&fs_info->qgroup_lock);
1335         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
1336                 qgroup->max_rfer = limit->max_rfer;
1337         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
1338                 qgroup->max_excl = limit->max_excl;
1339         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
1340                 qgroup->rsv_rfer = limit->rsv_rfer;
1341         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
1342                 qgroup->rsv_excl = limit->rsv_excl;
1343         qgroup->lim_flags |= limit->flags;
1344
1345         spin_unlock(&fs_info->qgroup_lock);
1346
1347         ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1348         if (ret) {
1349                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1350                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1351                        qgroupid);
1352         }
1353
1354 out:
1355         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1356         return ret;
1357 }
1358
1359 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1360                            struct btrfs_qgroup_operation *oper2)
1361 {
1362         /*
1363          * Ignore seq and type here, we're looking for any operation
1364          * at all related to this extent on that root.
1365          */
1366         if (oper1->bytenr < oper2->bytenr)
1367                 return -1;
1368         if (oper1->bytenr > oper2->bytenr)
1369                 return 1;
1370         if (oper1->ref_root < oper2->ref_root)
1371                 return -1;
1372         if (oper1->ref_root > oper2->ref_root)
1373                 return 1;
1374         return 0;
1375 }
1376
1377 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1378                               struct btrfs_qgroup_operation *oper)
1379 {
1380         struct rb_node *n;
1381         struct btrfs_qgroup_operation *cur;
1382         int cmp;
1383
1384         spin_lock(&fs_info->qgroup_op_lock);
1385         n = fs_info->qgroup_op_tree.rb_node;
1386         while (n) {
1387                 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1388                 cmp = comp_oper_exist(cur, oper);
1389                 if (cmp < 0) {
1390                         n = n->rb_right;
1391                 } else if (cmp) {
1392                         n = n->rb_left;
1393                 } else {
1394                         spin_unlock(&fs_info->qgroup_op_lock);
1395                         return -EEXIST;
1396                 }
1397         }
1398         spin_unlock(&fs_info->qgroup_op_lock);
1399         return 0;
1400 }
1401
1402 static int comp_oper(struct btrfs_qgroup_operation *oper1,
1403                      struct btrfs_qgroup_operation *oper2)
1404 {
1405         if (oper1->bytenr < oper2->bytenr)
1406                 return -1;
1407         if (oper1->bytenr > oper2->bytenr)
1408                 return 1;
1409         if (oper1->ref_root < oper2->ref_root)
1410                 return -1;
1411         if (oper1->ref_root > oper2->ref_root)
1412                 return 1;
1413         if (oper1->seq < oper2->seq)
1414                 return -1;
1415         if (oper1->seq > oper2->seq)
1416                 return 1;
1417         if (oper1->type < oper2->type)
1418                 return -1;
1419         if (oper1->type > oper2->type)
1420                 return 1;
1421         return 0;
1422 }
1423
1424 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1425                               struct btrfs_qgroup_operation *oper)
1426 {
1427         struct rb_node **p;
1428         struct rb_node *parent = NULL;
1429         struct btrfs_qgroup_operation *cur;
1430         int cmp;
1431
1432         spin_lock(&fs_info->qgroup_op_lock);
1433         p = &fs_info->qgroup_op_tree.rb_node;
1434         while (*p) {
1435                 parent = *p;
1436                 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1437                 cmp = comp_oper(cur, oper);
1438                 if (cmp < 0) {
1439                         p = &(*p)->rb_right;
1440                 } else if (cmp) {
1441                         p = &(*p)->rb_left;
1442                 } else {
1443                         spin_unlock(&fs_info->qgroup_op_lock);
1444                         return -EEXIST;
1445                 }
1446         }
1447         rb_link_node(&oper->n, parent, p);
1448         rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1449         spin_unlock(&fs_info->qgroup_op_lock);
1450         return 0;
1451 }
1452
1453 /*
1454  * Record a quota operation for processing later on.
1455  * @trans: the transaction we are adding the delayed op to.
1456  * @fs_info: the fs_info for this fs.
1457  * @ref_root: the root of the reference we are acting on,
1458  * @bytenr: the bytenr we are acting on.
1459  * @num_bytes: the number of bytes in the reference.
1460  * @type: the type of operation this is.
1461  * @mod_seq: do we need to get a sequence number for looking up roots.
1462  *
1463  * We just add it to our trans qgroup_ref_list and carry on and process these
1464  * operations in order at some later point.  If the reference root isn't a fs
1465  * root then we don't bother with doing anything.
1466  *
1467  * MUST BE HOLDING THE REF LOCK.
1468  */
1469 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1470                             struct btrfs_fs_info *fs_info, u64 ref_root,
1471                             u64 bytenr, u64 num_bytes,
1472                             enum btrfs_qgroup_operation_type type, int mod_seq)
1473 {
1474         struct btrfs_qgroup_operation *oper;
1475         int ret;
1476
1477         if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1478                 return 0;
1479
1480         oper = kmalloc(sizeof(*oper), GFP_NOFS);
1481         if (!oper)
1482                 return -ENOMEM;
1483
1484         oper->ref_root = ref_root;
1485         oper->bytenr = bytenr;
1486         oper->num_bytes = num_bytes;
1487         oper->type = type;
1488         oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1489         INIT_LIST_HEAD(&oper->elem.list);
1490         oper->elem.seq = 0;
1491
1492         trace_btrfs_qgroup_record_ref(oper);
1493
1494         if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1495                 /*
1496                  * If any operation for this bytenr/ref_root combo
1497                  * exists, then we know it's not exclusively owned and
1498                  * shouldn't be queued up.
1499                  *
1500                  * This also catches the case where we have a cloned
1501                  * extent that gets queued up multiple times during
1502                  * drop snapshot.
1503                  */
1504                 if (qgroup_oper_exists(fs_info, oper)) {
1505                         kfree(oper);
1506                         return 0;
1507                 }
1508         }
1509
1510         ret = insert_qgroup_oper(fs_info, oper);
1511         if (ret) {
1512                 /* Shouldn't happen so have an assert for developers */
1513                 ASSERT(0);
1514                 kfree(oper);
1515                 return ret;
1516         }
1517         list_add_tail(&oper->list, &trans->qgroup_ref_list);
1518
1519         if (mod_seq)
1520                 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1521
1522         return 0;
1523 }
1524
1525 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1526                                   struct btrfs_qgroup_operation *oper)
1527 {
1528         struct ulist *tmp;
1529         int sign = 0;
1530         int ret = 0;
1531
1532         tmp = ulist_alloc(GFP_NOFS);
1533         if (!tmp)
1534                 return -ENOMEM;
1535
1536         spin_lock(&fs_info->qgroup_lock);
1537         if (!fs_info->quota_root)
1538                 goto out;
1539
1540         switch (oper->type) {
1541         case BTRFS_QGROUP_OPER_ADD_EXCL:
1542                 sign = 1;
1543                 break;
1544         case BTRFS_QGROUP_OPER_SUB_EXCL:
1545                 sign = -1;
1546                 break;
1547         default:
1548                 ASSERT(0);
1549         }
1550         ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
1551                                        oper->num_bytes, sign);
1552 out:
1553         spin_unlock(&fs_info->qgroup_lock);
1554         ulist_free(tmp);
1555         return ret;
1556 }
1557
1558 /*
1559  * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1560  * properly.
1561  */
1562 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1563                                   u64 root_to_skip, struct ulist *tmp,
1564                                   struct ulist *roots, struct ulist *qgroups,
1565                                   u64 seq, int *old_roots, int rescan)
1566 {
1567         struct ulist_node *unode;
1568         struct ulist_iterator uiter;
1569         struct ulist_node *tmp_unode;
1570         struct ulist_iterator tmp_uiter;
1571         struct btrfs_qgroup *qg;
1572         int ret;
1573
1574         ULIST_ITER_INIT(&uiter);
1575         while ((unode = ulist_next(roots, &uiter))) {
1576                 /* We don't count our current root here */
1577                 if (unode->val == root_to_skip)
1578                         continue;
1579                 qg = find_qgroup_rb(fs_info, unode->val);
1580                 if (!qg)
1581                         continue;
1582                 /*
1583                  * We could have a pending removal of this same ref so we may
1584                  * not have actually found our ref root when doing
1585                  * btrfs_find_all_roots, so we need to keep track of how many
1586                  * old roots we find in case we removed ours and added a
1587                  * different one at the same time.  I don't think this could
1588                  * happen in practice but that sort of thinking leads to pain
1589                  * and suffering and to the dark side.
1590                  */
1591                 (*old_roots)++;
1592
1593                 ulist_reinit(tmp);
1594                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1595                                 GFP_ATOMIC);
1596                 if (ret < 0)
1597                         return ret;
1598                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1599                 if (ret < 0)
1600                         return ret;
1601                 ULIST_ITER_INIT(&tmp_uiter);
1602                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1603                         struct btrfs_qgroup_list *glist;
1604
1605                         qg = u64_to_ptr(tmp_unode->aux);
1606                         /*
1607                          * We use this sequence number to keep from having to
1608                          * run the whole list and 0 out the refcnt every time.
1609                          * We basically use sequnce as the known 0 count and
1610                          * then add 1 everytime we see a qgroup.  This is how we
1611                          * get how many of the roots actually point up to the
1612                          * upper level qgroups in order to determine exclusive
1613                          * counts.
1614                          *
1615                          * For rescan we want to set old_refcnt to seq so our
1616                          * exclusive calculations end up correct.
1617                          */
1618                         if (rescan)
1619                                 qg->old_refcnt = seq;
1620                         else if (qg->old_refcnt < seq)
1621                                 qg->old_refcnt = seq + 1;
1622                         else
1623                                 qg->old_refcnt++;
1624
1625                         if (qg->new_refcnt < seq)
1626                                 qg->new_refcnt = seq + 1;
1627                         else
1628                                 qg->new_refcnt++;
1629                         list_for_each_entry(glist, &qg->groups, next_group) {
1630                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1631                                                 ptr_to_u64(glist->group),
1632                                                 GFP_ATOMIC);
1633                                 if (ret < 0)
1634                                         return ret;
1635                                 ret = ulist_add(tmp, glist->group->qgroupid,
1636                                                 ptr_to_u64(glist->group),
1637                                                 GFP_ATOMIC);
1638                                 if (ret < 0)
1639                                         return ret;
1640                         }
1641                 }
1642         }
1643         return 0;
1644 }
1645
1646 /*
1647  * We need to walk forward in our operation tree and account for any roots that
1648  * were deleted after we made this operation.
1649  */
1650 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1651                                        struct btrfs_qgroup_operation *oper,
1652                                        struct ulist *tmp,
1653                                        struct ulist *qgroups, u64 seq,
1654                                        int *old_roots)
1655 {
1656         struct ulist_node *unode;
1657         struct ulist_iterator uiter;
1658         struct btrfs_qgroup *qg;
1659         struct btrfs_qgroup_operation *tmp_oper;
1660         struct rb_node *n;
1661         int ret;
1662
1663         ulist_reinit(tmp);
1664
1665         /*
1666          * We only walk forward in the tree since we're only interested in
1667          * removals that happened _after_  our operation.
1668          */
1669         spin_lock(&fs_info->qgroup_op_lock);
1670         n = rb_next(&oper->n);
1671         spin_unlock(&fs_info->qgroup_op_lock);
1672         if (!n)
1673                 return 0;
1674         tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1675         while (tmp_oper->bytenr == oper->bytenr) {
1676                 /*
1677                  * If it's not a removal we don't care, additions work out
1678                  * properly with our refcnt tracking.
1679                  */
1680                 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1681                     tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1682                         goto next;
1683                 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1684                 if (!qg)
1685                         goto next;
1686                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1687                                 GFP_ATOMIC);
1688                 if (ret) {
1689                         if (ret < 0)
1690                                 return ret;
1691                         /*
1692                          * We only want to increase old_roots if this qgroup is
1693                          * not already in the list of qgroups.  If it is already
1694                          * there then that means it must have been re-added or
1695                          * the delete will be discarded because we had an
1696                          * existing ref that we haven't looked up yet.  In this
1697                          * case we don't want to increase old_roots.  So if ret
1698                          * == 1 then we know that this is the first time we've
1699                          * seen this qgroup and we can bump the old_roots.
1700                          */
1701                         (*old_roots)++;
1702                         ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1703                                         GFP_ATOMIC);
1704                         if (ret < 0)
1705                                 return ret;
1706                 }
1707 next:
1708                 spin_lock(&fs_info->qgroup_op_lock);
1709                 n = rb_next(&tmp_oper->n);
1710                 spin_unlock(&fs_info->qgroup_op_lock);
1711                 if (!n)
1712                         break;
1713                 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1714         }
1715
1716         /* Ok now process the qgroups we found */
1717         ULIST_ITER_INIT(&uiter);
1718         while ((unode = ulist_next(tmp, &uiter))) {
1719                 struct btrfs_qgroup_list *glist;
1720
1721                 qg = u64_to_ptr(unode->aux);
1722                 if (qg->old_refcnt < seq)
1723                         qg->old_refcnt = seq + 1;
1724                 else
1725                         qg->old_refcnt++;
1726                 if (qg->new_refcnt < seq)
1727                         qg->new_refcnt = seq + 1;
1728                 else
1729                         qg->new_refcnt++;
1730                 list_for_each_entry(glist, &qg->groups, next_group) {
1731                         ret = ulist_add(qgroups, glist->group->qgroupid,
1732                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1733                         if (ret < 0)
1734                                 return ret;
1735                         ret = ulist_add(tmp, glist->group->qgroupid,
1736                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1737                         if (ret < 0)
1738                                 return ret;
1739                 }
1740         }
1741         return 0;
1742 }
1743
1744 /* Add refcnt for the newly added reference. */
1745 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1746                                   struct btrfs_qgroup_operation *oper,
1747                                   struct btrfs_qgroup *qgroup,
1748                                   struct ulist *tmp, struct ulist *qgroups,
1749                                   u64 seq)
1750 {
1751         struct ulist_node *unode;
1752         struct ulist_iterator uiter;
1753         struct btrfs_qgroup *qg;
1754         int ret;
1755
1756         ulist_reinit(tmp);
1757         ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1758                         GFP_ATOMIC);
1759         if (ret < 0)
1760                 return ret;
1761         ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1762                         GFP_ATOMIC);
1763         if (ret < 0)
1764                 return ret;
1765         ULIST_ITER_INIT(&uiter);
1766         while ((unode = ulist_next(tmp, &uiter))) {
1767                 struct btrfs_qgroup_list *glist;
1768
1769                 qg = u64_to_ptr(unode->aux);
1770                 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1771                         if (qg->new_refcnt < seq)
1772                                 qg->new_refcnt = seq + 1;
1773                         else
1774                                 qg->new_refcnt++;
1775                 } else {
1776                         if (qg->old_refcnt < seq)
1777                                 qg->old_refcnt = seq + 1;
1778                         else
1779                                 qg->old_refcnt++;
1780                 }
1781                 list_for_each_entry(glist, &qg->groups, next_group) {
1782                         ret = ulist_add(tmp, glist->group->qgroupid,
1783                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1784                         if (ret < 0)
1785                                 return ret;
1786                         ret = ulist_add(qgroups, glist->group->qgroupid,
1787                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1788                         if (ret < 0)
1789                                 return ret;
1790                 }
1791         }
1792         return 0;
1793 }
1794
1795 /*
1796  * This adjusts the counters for all referenced qgroups if need be.
1797  */
1798 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1799                                   u64 root_to_skip, u64 num_bytes,
1800                                   struct ulist *qgroups, u64 seq,
1801                                   int old_roots, int new_roots, int rescan)
1802 {
1803         struct ulist_node *unode;
1804         struct ulist_iterator uiter;
1805         struct btrfs_qgroup *qg;
1806         u64 cur_new_count, cur_old_count;
1807
1808         ULIST_ITER_INIT(&uiter);
1809         while ((unode = ulist_next(qgroups, &uiter))) {
1810                 bool dirty = false;
1811
1812                 qg = u64_to_ptr(unode->aux);
1813                 /*
1814                  * Wasn't referenced before but is now, add to the reference
1815                  * counters.
1816                  */
1817                 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1818                         qg->rfer += num_bytes;
1819                         qg->rfer_cmpr += num_bytes;
1820                         dirty = true;
1821                 }
1822
1823                 /*
1824                  * Was referenced before but isn't now, subtract from the
1825                  * reference counters.
1826                  */
1827                 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1828                         qg->rfer -= num_bytes;
1829                         qg->rfer_cmpr -= num_bytes;
1830                         dirty = true;
1831                 }
1832
1833                 if (qg->old_refcnt < seq)
1834                         cur_old_count = 0;
1835                 else
1836                         cur_old_count = qg->old_refcnt - seq;
1837                 if (qg->new_refcnt < seq)
1838                         cur_new_count = 0;
1839                 else
1840                         cur_new_count = qg->new_refcnt - seq;
1841
1842                 /*
1843                  * If our refcount was the same as the roots previously but our
1844                  * new count isn't the same as the number of roots now then we
1845                  * went from having a exclusive reference on this range to not.
1846                  */
1847                 if (old_roots && cur_old_count == old_roots &&
1848                     (cur_new_count != new_roots || new_roots == 0)) {
1849                         WARN_ON(cur_new_count != new_roots && new_roots == 0);
1850                         qg->excl -= num_bytes;
1851                         qg->excl_cmpr -= num_bytes;
1852                         dirty = true;
1853                 }
1854
1855                 /*
1856                  * If we didn't reference all the roots before but now we do we
1857                  * have an exclusive reference to this range.
1858                  */
1859                 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1860                     && cur_new_count == new_roots) {
1861                         qg->excl += num_bytes;
1862                         qg->excl_cmpr += num_bytes;
1863                         dirty = true;
1864                 }
1865
1866                 if (dirty)
1867                         qgroup_dirty(fs_info, qg);
1868         }
1869         return 0;
1870 }
1871
1872 /*
1873  * If we removed a data extent and there were other references for that bytenr
1874  * then we need to lookup all referenced roots to make sure we still don't
1875  * reference this bytenr.  If we do then we can just discard this operation.
1876  */
1877 static int check_existing_refs(struct btrfs_trans_handle *trans,
1878                                struct btrfs_fs_info *fs_info,
1879                                struct btrfs_qgroup_operation *oper)
1880 {
1881         struct ulist *roots = NULL;
1882         struct ulist_node *unode;
1883         struct ulist_iterator uiter;
1884         int ret = 0;
1885
1886         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1887                                    oper->elem.seq, &roots);
1888         if (ret < 0)
1889                 return ret;
1890         ret = 0;
1891
1892         ULIST_ITER_INIT(&uiter);
1893         while ((unode = ulist_next(roots, &uiter))) {
1894                 if (unode->val == oper->ref_root) {
1895                         ret = 1;
1896                         break;
1897                 }
1898         }
1899         ulist_free(roots);
1900         btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1901
1902         return ret;
1903 }
1904
1905 /*
1906  * If we share a reference across multiple roots then we may need to adjust
1907  * various qgroups referenced and exclusive counters.  The basic premise is this
1908  *
1909  * 1) We have seq to represent a 0 count.  Instead of looping through all of the
1910  * qgroups and resetting their refcount to 0 we just constantly bump this
1911  * sequence number to act as the base reference count.  This means that if
1912  * anybody is equal to or below this sequence they were never referenced.  We
1913  * jack this sequence up by the number of roots we found each time in order to
1914  * make sure we don't have any overlap.
1915  *
1916  * 2) We first search all the roots that reference the area _except_ the root
1917  * we're acting on currently.  This makes up the old_refcnt of all the qgroups
1918  * before.
1919  *
1920  * 3) We walk all of the qgroups referenced by the root we are currently acting
1921  * on, and will either adjust old_refcnt in the case of a removal or the
1922  * new_refcnt in the case of an addition.
1923  *
1924  * 4) Finally we walk all the qgroups that are referenced by this range
1925  * including the root we are acting on currently.  We will adjust the counters
1926  * based on the number of roots we had and will have after this operation.
1927  *
1928  * Take this example as an illustration
1929  *
1930  *                      [qgroup 1/0]
1931  *                   /         |          \
1932  *              [qg 0/0]   [qg 0/1]     [qg 0/2]
1933  *                 \          |            /
1934  *                [        extent           ]
1935  *
1936  * Say we are adding a reference that is covered by qg 0/0.  The first step
1937  * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1938  * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
1939  * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1940  * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
1941  * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1942  * reference and thus must add the size to the referenced bytes.  Everything
1943  * else is the same so nothing else changes.
1944  */
1945 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1946                                     struct btrfs_fs_info *fs_info,
1947                                     struct btrfs_qgroup_operation *oper)
1948 {
1949         struct ulist *roots = NULL;
1950         struct ulist *qgroups, *tmp;
1951         struct btrfs_qgroup *qgroup;
1952         struct seq_list elem = SEQ_LIST_INIT(elem);
1953         u64 seq;
1954         int old_roots = 0;
1955         int new_roots = 0;
1956         int ret = 0;
1957
1958         if (oper->elem.seq) {
1959                 ret = check_existing_refs(trans, fs_info, oper);
1960                 if (ret < 0)
1961                         return ret;
1962                 if (ret)
1963                         return 0;
1964         }
1965
1966         qgroups = ulist_alloc(GFP_NOFS);
1967         if (!qgroups)
1968                 return -ENOMEM;
1969
1970         tmp = ulist_alloc(GFP_NOFS);
1971         if (!tmp) {
1972                 ulist_free(qgroups);
1973                 return -ENOMEM;
1974         }
1975
1976         btrfs_get_tree_mod_seq(fs_info, &elem);
1977         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1978                                    &roots);
1979         btrfs_put_tree_mod_seq(fs_info, &elem);
1980         if (ret < 0) {
1981                 ulist_free(qgroups);
1982                 ulist_free(tmp);
1983                 return ret;
1984         }
1985         spin_lock(&fs_info->qgroup_lock);
1986         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1987         if (!qgroup)
1988                 goto out;
1989         seq = fs_info->qgroup_seq;
1990
1991         /*
1992          * So roots is the list of all the roots currently pointing at the
1993          * bytenr, including the ref we are adding if we are adding, or not if
1994          * we are removing a ref.  So we pass in the ref_root to skip that root
1995          * in our calculations.  We set old_refnct and new_refcnt cause who the
1996          * hell knows what everything looked like before, and it doesn't matter
1997          * except...
1998          */
1999         ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
2000                                      seq, &old_roots, 0);
2001         if (ret < 0)
2002                 goto out;
2003
2004         /*
2005          * Now adjust the refcounts of the qgroups that care about this
2006          * reference, either the old_count in the case of removal or new_count
2007          * in the case of an addition.
2008          */
2009         ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
2010                                      seq);
2011         if (ret < 0)
2012                 goto out;
2013
2014         /*
2015          * ...in the case of removals.  If we had a removal before we got around
2016          * to processing this operation then we need to find that guy and count
2017          * his references as if they really existed so we don't end up screwing
2018          * up the exclusive counts.  Then whenever we go to process the delete
2019          * everything will be grand and we can account for whatever exclusive
2020          * changes need to be made there.  We also have to pass in old_roots so
2021          * we have an accurate count of the roots as it pertains to this
2022          * operations view of the world.
2023          */
2024         ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
2025                                           &old_roots);
2026         if (ret < 0)
2027                 goto out;
2028
2029         /*
2030          * We are adding our root, need to adjust up the number of roots,
2031          * otherwise old_roots is the number of roots we want.
2032          */
2033         if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
2034                 new_roots = old_roots + 1;
2035         } else {
2036                 new_roots = old_roots;
2037                 old_roots++;
2038         }
2039         fs_info->qgroup_seq += old_roots + 1;
2040
2041
2042         /*
2043          * And now the magic happens, bless Arne for having a pretty elegant
2044          * solution for this.
2045          */
2046         qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
2047                                qgroups, seq, old_roots, new_roots, 0);
2048 out:
2049         spin_unlock(&fs_info->qgroup_lock);
2050         ulist_free(qgroups);
2051         ulist_free(roots);
2052         ulist_free(tmp);
2053         return ret;
2054 }
2055
2056 /*
2057  * Process a reference to a shared subtree. This type of operation is
2058  * queued during snapshot removal when we encounter extents which are
2059  * shared between more than one root.
2060  */
2061 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
2062                                      struct btrfs_fs_info *fs_info,
2063                                      struct btrfs_qgroup_operation *oper)
2064 {
2065         struct ulist *roots = NULL;
2066         struct ulist_node *unode;
2067         struct ulist_iterator uiter;
2068         struct btrfs_qgroup_list *glist;
2069         struct ulist *parents;
2070         int ret = 0;
2071         int err;
2072         struct btrfs_qgroup *qg;
2073         u64 root_obj = 0;
2074         struct seq_list elem = SEQ_LIST_INIT(elem);
2075
2076         parents = ulist_alloc(GFP_NOFS);
2077         if (!parents)
2078                 return -ENOMEM;
2079
2080         btrfs_get_tree_mod_seq(fs_info, &elem);
2081         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2082                                    elem.seq, &roots);
2083         btrfs_put_tree_mod_seq(fs_info, &elem);
2084         if (ret < 0)
2085                 goto out;
2086
2087         if (roots->nnodes != 1)
2088                 goto out;
2089
2090         ULIST_ITER_INIT(&uiter);
2091         unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
2092         /*
2093          * If we find our ref root then that means all refs
2094          * this extent has to the root have not yet been
2095          * deleted. In that case, we do nothing and let the
2096          * last ref for this bytenr drive our update.
2097          *
2098          * This can happen for example if an extent is
2099          * referenced multiple times in a snapshot (clone,
2100          * etc). If we are in the middle of snapshot removal,
2101          * queued updates for such an extent will find the
2102          * root if we have not yet finished removing the
2103          * snapshot.
2104          */
2105         if (unode->val == oper->ref_root)
2106                 goto out;
2107
2108         root_obj = unode->val;
2109         BUG_ON(!root_obj);
2110
2111         spin_lock(&fs_info->qgroup_lock);
2112         qg = find_qgroup_rb(fs_info, root_obj);
2113         if (!qg)
2114                 goto out_unlock;
2115
2116         qg->excl += oper->num_bytes;
2117         qg->excl_cmpr += oper->num_bytes;
2118         qgroup_dirty(fs_info, qg);
2119
2120         /*
2121          * Adjust counts for parent groups. First we find all
2122          * parents, then in the 2nd loop we do the adjustment
2123          * while adding parents of the parents to our ulist.
2124          */
2125         list_for_each_entry(glist, &qg->groups, next_group) {
2126                 err = ulist_add(parents, glist->group->qgroupid,
2127                                 ptr_to_u64(glist->group), GFP_ATOMIC);
2128                 if (err < 0) {
2129                         ret = err;
2130                         goto out_unlock;
2131                 }
2132         }
2133
2134         ULIST_ITER_INIT(&uiter);
2135         while ((unode = ulist_next(parents, &uiter))) {
2136                 qg = u64_to_ptr(unode->aux);
2137                 qg->excl += oper->num_bytes;
2138                 qg->excl_cmpr += oper->num_bytes;
2139                 qgroup_dirty(fs_info, qg);
2140
2141                 /* Add any parents of the parents */
2142                 list_for_each_entry(glist, &qg->groups, next_group) {
2143                         err = ulist_add(parents, glist->group->qgroupid,
2144                                         ptr_to_u64(glist->group), GFP_ATOMIC);
2145                         if (err < 0) {
2146                                 ret = err;
2147                                 goto out_unlock;
2148                         }
2149                 }
2150         }
2151
2152 out_unlock:
2153         spin_unlock(&fs_info->qgroup_lock);
2154
2155 out:
2156         ulist_free(roots);
2157         ulist_free(parents);
2158         return ret;
2159 }
2160
2161 /*
2162  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
2163  * from the fs. First, all roots referencing the extent are searched, and
2164  * then the space is accounted accordingly to the different roots. The
2165  * accounting algorithm works in 3 steps documented inline.
2166  */
2167 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
2168                                 struct btrfs_fs_info *fs_info,
2169                                 struct btrfs_qgroup_operation *oper)
2170 {
2171         int ret = 0;
2172
2173         if (!fs_info->quota_enabled)
2174                 return 0;
2175
2176         BUG_ON(!fs_info->quota_root);
2177
2178         mutex_lock(&fs_info->qgroup_rescan_lock);
2179         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2180                 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
2181                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2182                         return 0;
2183                 }
2184         }
2185         mutex_unlock(&fs_info->qgroup_rescan_lock);
2186
2187         ASSERT(is_fstree(oper->ref_root));
2188
2189         trace_btrfs_qgroup_account(oper);
2190
2191         switch (oper->type) {
2192         case BTRFS_QGROUP_OPER_ADD_EXCL:
2193         case BTRFS_QGROUP_OPER_SUB_EXCL:
2194                 ret = qgroup_excl_accounting(fs_info, oper);
2195                 break;
2196         case BTRFS_QGROUP_OPER_ADD_SHARED:
2197         case BTRFS_QGROUP_OPER_SUB_SHARED:
2198                 ret = qgroup_shared_accounting(trans, fs_info, oper);
2199                 break;
2200         case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2201                 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2202                 break;
2203         default:
2204                 ASSERT(0);
2205         }
2206         return ret;
2207 }
2208
2209 /*
2210  * Needs to be called everytime we run delayed refs, even if there is an error
2211  * in order to cleanup outstanding operations.
2212  */
2213 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
2214                                     struct btrfs_fs_info *fs_info)
2215 {
2216         struct btrfs_qgroup_operation *oper;
2217         int ret = 0;
2218
2219         while (!list_empty(&trans->qgroup_ref_list)) {
2220                 oper = list_first_entry(&trans->qgroup_ref_list,
2221                                         struct btrfs_qgroup_operation, list);
2222                 list_del_init(&oper->list);
2223                 if (!ret || !trans->aborted)
2224                         ret = btrfs_qgroup_account(trans, fs_info, oper);
2225                 spin_lock(&fs_info->qgroup_op_lock);
2226                 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
2227                 spin_unlock(&fs_info->qgroup_op_lock);
2228                 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2229                 kfree(oper);
2230         }
2231         return ret;
2232 }
2233
2234 /*
2235  * called from commit_transaction. Writes all changed qgroups to disk.
2236  */
2237 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2238                       struct btrfs_fs_info *fs_info)
2239 {
2240         struct btrfs_root *quota_root = fs_info->quota_root;
2241         int ret = 0;
2242         int start_rescan_worker = 0;
2243
2244         if (!quota_root)
2245                 goto out;
2246
2247         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
2248                 start_rescan_worker = 1;
2249
2250         fs_info->quota_enabled = fs_info->pending_quota_state;
2251
2252         spin_lock(&fs_info->qgroup_lock);
2253         while (!list_empty(&fs_info->dirty_qgroups)) {
2254                 struct btrfs_qgroup *qgroup;
2255                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2256                                           struct btrfs_qgroup, dirty);
2257                 list_del_init(&qgroup->dirty);
2258                 spin_unlock(&fs_info->qgroup_lock);
2259                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
2260                 if (ret)
2261                         fs_info->qgroup_flags |=
2262                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2263                 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2264                 if (ret)
2265                         fs_info->qgroup_flags |=
2266                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2267                 spin_lock(&fs_info->qgroup_lock);
2268         }
2269         if (fs_info->quota_enabled)
2270                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2271         else
2272                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2273         spin_unlock(&fs_info->qgroup_lock);
2274
2275         ret = update_qgroup_status_item(trans, fs_info, quota_root);
2276         if (ret)
2277                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2278
2279         if (!ret && start_rescan_worker) {
2280                 ret = qgroup_rescan_init(fs_info, 0, 1);
2281                 if (!ret) {
2282                         qgroup_rescan_zero_tracking(fs_info);
2283                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2284                                          &fs_info->qgroup_rescan_work);
2285                 }
2286                 ret = 0;
2287         }
2288
2289 out:
2290
2291         return ret;
2292 }
2293
2294 /*
2295  * copy the acounting information between qgroups. This is necessary when a
2296  * snapshot or a subvolume is created
2297  */
2298 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2299                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2300                          struct btrfs_qgroup_inherit *inherit)
2301 {
2302         int ret = 0;
2303         int i;
2304         u64 *i_qgroups;
2305         struct btrfs_root *quota_root = fs_info->quota_root;
2306         struct btrfs_qgroup *srcgroup;
2307         struct btrfs_qgroup *dstgroup;
2308         u32 level_size = 0;
2309         u64 nums;
2310
2311         mutex_lock(&fs_info->qgroup_ioctl_lock);
2312         if (!fs_info->quota_enabled)
2313                 goto out;
2314
2315         if (!quota_root) {
2316                 ret = -EINVAL;
2317                 goto out;
2318         }
2319
2320         if (inherit) {
2321                 i_qgroups = (u64 *)(inherit + 1);
2322                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2323                        2 * inherit->num_excl_copies;
2324                 for (i = 0; i < nums; ++i) {
2325                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2326                         if (!srcgroup) {
2327                                 ret = -EINVAL;
2328                                 goto out;
2329                         }
2330
2331                         if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
2332                                 ret = -EINVAL;
2333                                 goto out;
2334                         }
2335                         ++i_qgroups;
2336                 }
2337         }
2338
2339         /*
2340          * create a tracking group for the subvol itself
2341          */
2342         ret = add_qgroup_item(trans, quota_root, objectid);
2343         if (ret)
2344                 goto out;
2345
2346         if (srcid) {
2347                 struct btrfs_root *srcroot;
2348                 struct btrfs_key srckey;
2349
2350                 srckey.objectid = srcid;
2351                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2352                 srckey.offset = (u64)-1;
2353                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2354                 if (IS_ERR(srcroot)) {
2355                         ret = PTR_ERR(srcroot);
2356                         goto out;
2357                 }
2358
2359                 rcu_read_lock();
2360                 level_size = srcroot->nodesize;
2361                 rcu_read_unlock();
2362         }
2363
2364         /*
2365          * add qgroup to all inherited groups
2366          */
2367         if (inherit) {
2368                 i_qgroups = (u64 *)(inherit + 1);
2369                 for (i = 0; i < inherit->num_qgroups; ++i) {
2370                         ret = add_qgroup_relation_item(trans, quota_root,
2371                                                        objectid, *i_qgroups);
2372                         if (ret)
2373                                 goto out;
2374                         ret = add_qgroup_relation_item(trans, quota_root,
2375                                                        *i_qgroups, objectid);
2376                         if (ret)
2377                                 goto out;
2378                         ++i_qgroups;
2379                 }
2380         }
2381
2382
2383         spin_lock(&fs_info->qgroup_lock);
2384
2385         dstgroup = add_qgroup_rb(fs_info, objectid);
2386         if (IS_ERR(dstgroup)) {
2387                 ret = PTR_ERR(dstgroup);
2388                 goto unlock;
2389         }
2390
2391         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2392                 dstgroup->lim_flags = inherit->lim.flags;
2393                 dstgroup->max_rfer = inherit->lim.max_rfer;
2394                 dstgroup->max_excl = inherit->lim.max_excl;
2395                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2396                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2397
2398                 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2399                 if (ret) {
2400                         fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2401                         btrfs_info(fs_info, "unable to update quota limit for %llu",
2402                                dstgroup->qgroupid);
2403                         goto unlock;
2404                 }
2405         }
2406
2407         if (srcid) {
2408                 srcgroup = find_qgroup_rb(fs_info, srcid);
2409                 if (!srcgroup)
2410                         goto unlock;
2411
2412                 /*
2413                  * We call inherit after we clone the root in order to make sure
2414                  * our counts don't go crazy, so at this point the only
2415                  * difference between the two roots should be the root node.
2416                  */
2417                 dstgroup->rfer = srcgroup->rfer;
2418                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2419                 dstgroup->excl = level_size;
2420                 dstgroup->excl_cmpr = level_size;
2421                 srcgroup->excl = level_size;
2422                 srcgroup->excl_cmpr = level_size;
2423
2424                 /* inherit the limit info */
2425                 dstgroup->lim_flags = srcgroup->lim_flags;
2426                 dstgroup->max_rfer = srcgroup->max_rfer;
2427                 dstgroup->max_excl = srcgroup->max_excl;
2428                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2429                 dstgroup->rsv_excl = srcgroup->rsv_excl;
2430
2431                 qgroup_dirty(fs_info, dstgroup);
2432                 qgroup_dirty(fs_info, srcgroup);
2433         }
2434
2435         if (!inherit)
2436                 goto unlock;
2437
2438         i_qgroups = (u64 *)(inherit + 1);
2439         for (i = 0; i < inherit->num_qgroups; ++i) {
2440                 ret = add_relation_rb(quota_root->fs_info, objectid,
2441                                       *i_qgroups);
2442                 if (ret)
2443                         goto unlock;
2444                 ++i_qgroups;
2445         }
2446
2447         for (i = 0; i <  inherit->num_ref_copies; ++i) {
2448                 struct btrfs_qgroup *src;
2449                 struct btrfs_qgroup *dst;
2450
2451                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2452                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2453
2454                 if (!src || !dst) {
2455                         ret = -EINVAL;
2456                         goto unlock;
2457                 }
2458
2459                 dst->rfer = src->rfer - level_size;
2460                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2461                 i_qgroups += 2;
2462         }
2463         for (i = 0; i <  inherit->num_excl_copies; ++i) {
2464                 struct btrfs_qgroup *src;
2465                 struct btrfs_qgroup *dst;
2466
2467                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2468                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2469
2470                 if (!src || !dst) {
2471                         ret = -EINVAL;
2472                         goto unlock;
2473                 }
2474
2475                 dst->excl = src->excl + level_size;
2476                 dst->excl_cmpr = src->excl_cmpr + level_size;
2477                 i_qgroups += 2;
2478         }
2479
2480 unlock:
2481         spin_unlock(&fs_info->qgroup_lock);
2482 out:
2483         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2484         return ret;
2485 }
2486
2487 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2488 {
2489         struct btrfs_root *quota_root;
2490         struct btrfs_qgroup *qgroup;
2491         struct btrfs_fs_info *fs_info = root->fs_info;
2492         u64 ref_root = root->root_key.objectid;
2493         int ret = 0;
2494         struct ulist_node *unode;
2495         struct ulist_iterator uiter;
2496
2497         if (!is_fstree(ref_root))
2498                 return 0;
2499
2500         if (num_bytes == 0)
2501                 return 0;
2502
2503         spin_lock(&fs_info->qgroup_lock);
2504         quota_root = fs_info->quota_root;
2505         if (!quota_root)
2506                 goto out;
2507
2508         qgroup = find_qgroup_rb(fs_info, ref_root);
2509         if (!qgroup)
2510                 goto out;
2511
2512         /*
2513          * in a first step, we check all affected qgroups if any limits would
2514          * be exceeded
2515          */
2516         ulist_reinit(fs_info->qgroup_ulist);
2517         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2518                         (uintptr_t)qgroup, GFP_ATOMIC);
2519         if (ret < 0)
2520                 goto out;
2521         ULIST_ITER_INIT(&uiter);
2522         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2523                 struct btrfs_qgroup *qg;
2524                 struct btrfs_qgroup_list *glist;
2525
2526                 qg = u64_to_ptr(unode->aux);
2527
2528                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2529                     qg->reserved + (s64)qg->rfer + num_bytes >
2530                     qg->max_rfer) {
2531                         ret = -EDQUOT;
2532                         goto out;
2533                 }
2534
2535                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2536                     qg->reserved + (s64)qg->excl + num_bytes >
2537                     qg->max_excl) {
2538                         ret = -EDQUOT;
2539                         goto out;
2540                 }
2541
2542                 list_for_each_entry(glist, &qg->groups, next_group) {
2543                         ret = ulist_add(fs_info->qgroup_ulist,
2544                                         glist->group->qgroupid,
2545                                         (uintptr_t)glist->group, GFP_ATOMIC);
2546                         if (ret < 0)
2547                                 goto out;
2548                 }
2549         }
2550         ret = 0;
2551         /*
2552          * no limits exceeded, now record the reservation into all qgroups
2553          */
2554         ULIST_ITER_INIT(&uiter);
2555         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2556                 struct btrfs_qgroup *qg;
2557
2558                 qg = u64_to_ptr(unode->aux);
2559
2560                 qg->reserved += num_bytes;
2561         }
2562
2563 out:
2564         spin_unlock(&fs_info->qgroup_lock);
2565         return ret;
2566 }
2567
2568 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2569 {
2570         struct btrfs_root *quota_root;
2571         struct btrfs_qgroup *qgroup;
2572         struct btrfs_fs_info *fs_info = root->fs_info;
2573         struct ulist_node *unode;
2574         struct ulist_iterator uiter;
2575         u64 ref_root = root->root_key.objectid;
2576         int ret = 0;
2577
2578         if (!is_fstree(ref_root))
2579                 return;
2580
2581         if (num_bytes == 0)
2582                 return;
2583
2584         spin_lock(&fs_info->qgroup_lock);
2585
2586         quota_root = fs_info->quota_root;
2587         if (!quota_root)
2588                 goto out;
2589
2590         qgroup = find_qgroup_rb(fs_info, ref_root);
2591         if (!qgroup)
2592                 goto out;
2593
2594         ulist_reinit(fs_info->qgroup_ulist);
2595         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2596                         (uintptr_t)qgroup, GFP_ATOMIC);
2597         if (ret < 0)
2598                 goto out;
2599         ULIST_ITER_INIT(&uiter);
2600         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2601                 struct btrfs_qgroup *qg;
2602                 struct btrfs_qgroup_list *glist;
2603
2604                 qg = u64_to_ptr(unode->aux);
2605
2606                 qg->reserved -= num_bytes;
2607
2608                 list_for_each_entry(glist, &qg->groups, next_group) {
2609                         ret = ulist_add(fs_info->qgroup_ulist,
2610                                         glist->group->qgroupid,
2611                                         (uintptr_t)glist->group, GFP_ATOMIC);
2612                         if (ret < 0)
2613                                 goto out;
2614                 }
2615         }
2616
2617 out:
2618         spin_unlock(&fs_info->qgroup_lock);
2619 }
2620
2621 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2622 {
2623         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2624                 return;
2625         btrfs_err(trans->root->fs_info,
2626                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
2627                 "seq is %#x.%x",
2628                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2629                 (u32)(trans->delayed_ref_elem.seq >> 32),
2630                 (u32)trans->delayed_ref_elem.seq);
2631         BUG();
2632 }
2633
2634 /*
2635  * returns < 0 on error, 0 when more leafs are to be scanned.
2636  * returns 1 when done.
2637  */
2638 static int
2639 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2640                    struct btrfs_trans_handle *trans, struct ulist *qgroups,
2641                    struct ulist *tmp, struct extent_buffer *scratch_leaf)
2642 {
2643         struct btrfs_key found;
2644         struct ulist *roots = NULL;
2645         struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2646         u64 num_bytes;
2647         u64 seq;
2648         int new_roots;
2649         int slot;
2650         int ret;
2651
2652         path->leave_spinning = 1;
2653         mutex_lock(&fs_info->qgroup_rescan_lock);
2654         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2655                                          &fs_info->qgroup_rescan_progress,
2656                                          path, 1, 0);
2657
2658         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2659                  fs_info->qgroup_rescan_progress.objectid,
2660                  fs_info->qgroup_rescan_progress.type,
2661                  fs_info->qgroup_rescan_progress.offset, ret);
2662
2663         if (ret) {
2664                 /*
2665                  * The rescan is about to end, we will not be scanning any
2666                  * further blocks. We cannot unset the RESCAN flag here, because
2667                  * we want to commit the transaction if everything went well.
2668                  * To make the live accounting work in this phase, we set our
2669                  * scan progress pointer such that every real extent objectid
2670                  * will be smaller.
2671                  */
2672                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2673                 btrfs_release_path(path);
2674                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2675                 return ret;
2676         }
2677
2678         btrfs_item_key_to_cpu(path->nodes[0], &found,
2679                               btrfs_header_nritems(path->nodes[0]) - 1);
2680         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2681
2682         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2683         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2684         slot = path->slots[0];
2685         btrfs_release_path(path);
2686         mutex_unlock(&fs_info->qgroup_rescan_lock);
2687
2688         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2689                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2690                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2691                     found.type != BTRFS_METADATA_ITEM_KEY)
2692                         continue;
2693                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2694                         num_bytes = fs_info->extent_root->nodesize;
2695                 else
2696                         num_bytes = found.offset;
2697
2698                 ulist_reinit(qgroups);
2699                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2700                                            &roots);
2701                 if (ret < 0)
2702                         goto out;
2703                 spin_lock(&fs_info->qgroup_lock);
2704                 seq = fs_info->qgroup_seq;
2705                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2706
2707                 new_roots = 0;
2708                 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2709                                              seq, &new_roots, 1);
2710                 if (ret < 0) {
2711                         spin_unlock(&fs_info->qgroup_lock);
2712                         ulist_free(roots);
2713                         goto out;
2714                 }
2715
2716                 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2717                                              seq, 0, new_roots, 1);
2718                 if (ret < 0) {
2719                         spin_unlock(&fs_info->qgroup_lock);
2720                         ulist_free(roots);
2721                         goto out;
2722                 }
2723                 spin_unlock(&fs_info->qgroup_lock);
2724                 ulist_free(roots);
2725         }
2726 out:
2727         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2728
2729         return ret;
2730 }
2731
2732 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2733 {
2734         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2735                                                      qgroup_rescan_work);
2736         struct btrfs_path *path;
2737         struct btrfs_trans_handle *trans = NULL;
2738         struct ulist *tmp = NULL, *qgroups = NULL;
2739         struct extent_buffer *scratch_leaf = NULL;
2740         int err = -ENOMEM;
2741         int ret = 0;
2742
2743         path = btrfs_alloc_path();
2744         if (!path)
2745                 goto out;
2746         qgroups = ulist_alloc(GFP_NOFS);
2747         if (!qgroups)
2748                 goto out;
2749         tmp = ulist_alloc(GFP_NOFS);
2750         if (!tmp)
2751                 goto out;
2752         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2753         if (!scratch_leaf)
2754                 goto out;
2755
2756         err = 0;
2757         while (!err) {
2758                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2759                 if (IS_ERR(trans)) {
2760                         err = PTR_ERR(trans);
2761                         break;
2762                 }
2763                 if (!fs_info->quota_enabled) {
2764                         err = -EINTR;
2765                 } else {
2766                         err = qgroup_rescan_leaf(fs_info, path, trans,
2767                                                  qgroups, tmp, scratch_leaf);
2768                 }
2769                 if (err > 0)
2770                         btrfs_commit_transaction(trans, fs_info->fs_root);
2771                 else
2772                         btrfs_end_transaction(trans, fs_info->fs_root);
2773         }
2774
2775 out:
2776         kfree(scratch_leaf);
2777         ulist_free(qgroups);
2778         ulist_free(tmp);
2779         btrfs_free_path(path);
2780
2781         mutex_lock(&fs_info->qgroup_rescan_lock);
2782         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2783
2784         if (err > 0 &&
2785             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2786                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2787         } else if (err < 0) {
2788                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2789         }
2790         mutex_unlock(&fs_info->qgroup_rescan_lock);
2791
2792         /*
2793          * only update status, since the previous part has alreay updated the
2794          * qgroup info.
2795          */
2796         trans = btrfs_start_transaction(fs_info->quota_root, 1);
2797         if (IS_ERR(trans)) {
2798                 err = PTR_ERR(trans);
2799                 btrfs_err(fs_info,
2800                           "fail to start transaction for status update: %d\n",
2801                           err);
2802                 goto done;
2803         }
2804         ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2805         if (ret < 0) {
2806                 err = ret;
2807                 btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
2808         }
2809         btrfs_end_transaction(trans, fs_info->quota_root);
2810
2811         if (err >= 0) {
2812                 btrfs_info(fs_info, "qgroup scan completed%s",
2813                         err > 0 ? " (inconsistency flag cleared)" : "");
2814         } else {
2815                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2816         }
2817
2818 done:
2819         complete_all(&fs_info->qgroup_rescan_completion);
2820 }
2821
2822 /*
2823  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2824  * memory required for the rescan context.
2825  */
2826 static int
2827 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2828                    int init_flags)
2829 {
2830         int ret = 0;
2831
2832         if (!init_flags &&
2833             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2834              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2835                 ret = -EINVAL;
2836                 goto err;
2837         }
2838
2839         mutex_lock(&fs_info->qgroup_rescan_lock);
2840         spin_lock(&fs_info->qgroup_lock);
2841
2842         if (init_flags) {
2843                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2844                         ret = -EINPROGRESS;
2845                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2846                         ret = -EINVAL;
2847
2848                 if (ret) {
2849                         spin_unlock(&fs_info->qgroup_lock);
2850                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2851                         goto err;
2852                 }
2853                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2854         }
2855
2856         memset(&fs_info->qgroup_rescan_progress, 0,
2857                 sizeof(fs_info->qgroup_rescan_progress));
2858         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2859
2860         spin_unlock(&fs_info->qgroup_lock);
2861         mutex_unlock(&fs_info->qgroup_rescan_lock);
2862
2863         init_completion(&fs_info->qgroup_rescan_completion);
2864
2865         memset(&fs_info->qgroup_rescan_work, 0,
2866                sizeof(fs_info->qgroup_rescan_work));
2867         btrfs_init_work(&fs_info->qgroup_rescan_work,
2868                         btrfs_qgroup_rescan_helper,
2869                         btrfs_qgroup_rescan_worker, NULL, NULL);
2870
2871         if (ret) {
2872 err:
2873                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2874                 return ret;
2875         }
2876
2877         return 0;
2878 }
2879
2880 static void
2881 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2882 {
2883         struct rb_node *n;
2884         struct btrfs_qgroup *qgroup;
2885
2886         spin_lock(&fs_info->qgroup_lock);
2887         /* clear all current qgroup tracking information */
2888         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2889                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2890                 qgroup->rfer = 0;
2891                 qgroup->rfer_cmpr = 0;
2892                 qgroup->excl = 0;
2893                 qgroup->excl_cmpr = 0;
2894         }
2895         spin_unlock(&fs_info->qgroup_lock);
2896 }
2897
2898 int
2899 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2900 {
2901         int ret = 0;
2902         struct btrfs_trans_handle *trans;
2903
2904         ret = qgroup_rescan_init(fs_info, 0, 1);
2905         if (ret)
2906                 return ret;
2907
2908         /*
2909          * We have set the rescan_progress to 0, which means no more
2910          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2911          * However, btrfs_qgroup_account_ref may be right after its call
2912          * to btrfs_find_all_roots, in which case it would still do the
2913          * accounting.
2914          * To solve this, we're committing the transaction, which will
2915          * ensure we run all delayed refs and only after that, we are
2916          * going to clear all tracking information for a clean start.
2917          */
2918
2919         trans = btrfs_join_transaction(fs_info->fs_root);
2920         if (IS_ERR(trans)) {
2921                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2922                 return PTR_ERR(trans);
2923         }
2924         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2925         if (ret) {
2926                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2927                 return ret;
2928         }
2929
2930         qgroup_rescan_zero_tracking(fs_info);
2931
2932         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2933                          &fs_info->qgroup_rescan_work);
2934
2935         return 0;
2936 }
2937
2938 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2939 {
2940         int running;
2941         int ret = 0;
2942
2943         mutex_lock(&fs_info->qgroup_rescan_lock);
2944         spin_lock(&fs_info->qgroup_lock);
2945         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2946         spin_unlock(&fs_info->qgroup_lock);
2947         mutex_unlock(&fs_info->qgroup_rescan_lock);
2948
2949         if (running)
2950                 ret = wait_for_completion_interruptible(
2951                                         &fs_info->qgroup_rescan_completion);
2952
2953         return ret;
2954 }
2955
2956 /*
2957  * this is only called from open_ctree where we're still single threaded, thus
2958  * locking is omitted here.
2959  */
2960 void
2961 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2962 {
2963         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2964                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2965                                  &fs_info->qgroup_rescan_work);
2966 }