These changes are the raw update to linux-4.4.6-rt14. Kernel sources

[kvmfornfv.git] / kernel / fs / f2fs / checkpoint.c
diff --git a/kernel/fs/f2fs/checkpoint.c b/kernel/fs/f2fs/checkpoint.c

index a5e17a2..f661d80 100644 (file)
--- a/kernel/fs/f2fs/checkpoint.c
+++ b/kernel/fs/f2fs/checkpoint.c
@@ -47,15 +47,21 @@ repeat:
  /*
   * We guarantee no failure on the returned page.
   */
-struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
+                                                       bool is_meta)
  {
         struct address_space *mapping = META_MAPPING(sbi);
         struct page *page;
         struct f2fs_io_info fio = {
+               .sbi = sbi,
                 .type = META,
                 .rw = READ_SYNC | REQ_META | REQ_PRIO,
                 .blk_addr = index,
+               .encrypted_page = NULL,
         };
+
+       if (unlikely(!is_meta))
+               fio.rw &= ~REQ_META;
  repeat:
         page = grab_cache_page(mapping, index);
         if (!page) {
@@ -65,20 +71,42 @@ repeat:
         if (PageUptodate(page))
                 goto out;
  
-       if (f2fs_submit_page_bio(sbi, page, &fio))
+       fio.page = page;
+
+       if (f2fs_submit_page_bio(&fio)) {
+               f2fs_put_page(page, 1);
                 goto repeat;
+       }
  
         lock_page(page);
         if (unlikely(page->mapping != mapping)) {
                 f2fs_put_page(page, 1);
                 goto repeat;
         }
+
+       /*
+        * if there is any IO error when accessing device, make our filesystem
+        * readonly and make sure do not write checkpoint with non-uptodate
+        * meta page.
+        */
+       if (unlikely(!PageUptodate(page)))
+               f2fs_stop_checkpoint(sbi);
  out:
         return page;
  }
  
-static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
-                                               block_t blkaddr, int type)
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+       return __get_meta_page(sbi, index, true);
+}
+
+/* for POR only */
+struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+       return __get_meta_page(sbi, index, false);
+}
+
+bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
  {
         switch (type) {
         case META_NAT:
@@ -112,16 +140,22 @@ static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
  /*
   * Readahead CP/NAT/SIT/SSA pages
   */
-int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
+int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+                                                       int type, bool sync)
  {
         block_t prev_blk_addr = 0;
         struct page *page;
         block_t blkno = start;
         struct f2fs_io_info fio = {
+               .sbi = sbi,
                 .type = META,
-               .rw = READ_SYNC | REQ_META | REQ_PRIO
+               .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
+               .encrypted_page = NULL,
         };
  
+       if (unlikely(type == META_POR))
+               fio.rw &= ~REQ_META;
+
         for (; nrpages-- > 0; blkno++) {
  
                 if (!is_valid_blkaddr(sbi, blkno, type))
@@ -161,7 +195,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
                         continue;
                 }
  
-               f2fs_submit_page_mbio(sbi, page, &fio);
+               fio.page = page;
+               f2fs_submit_page_mbio(&fio);
                 f2fs_put_page(page, 0);
         }
  out:
@@ -180,7 +215,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
         f2fs_put_page(page, 0);
  
         if (readahead)
-               ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
+               ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
  }
  
  static int f2fs_write_meta_page(struct page *page,
@@ -241,7 +276,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                                                 long nr_to_write)
  {
         struct address_space *mapping = META_MAPPING(sbi);
-       pgoff_t index = 0, end = LONG_MAX;
+       pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
         struct pagevec pvec;
         long nwritten = 0;
         struct writeback_control wbc = {
@@ -261,6 +296,13 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
  
+                       if (prev == LONG_MAX)
+                               prev = page->index - 1;
+                       if (nr_to_write != LONG_MAX && page->index != prev + 1) {
+                               pagevec_release(&pvec);
+                               goto stop;
+                       }
+
                         lock_page(page);
  
                         if (unlikely(page->mapping != mapping)) {
@@ -281,13 +323,14 @@ continue_unlock:
                                 break;
                         }
                         nwritten++;
+                       prev = page->index;
                         if (unlikely(nwritten >= nr_to_write))
                                 break;
                 }
                 pagevec_release(&pvec);
                 cond_resched();
         }
-
+stop:
         if (nwritten)
                 f2fs_submit_merged_bio(sbi, type, WRITE);
  
@@ -320,26 +363,18 @@ const struct address_space_operations f2fs_meta_aops = {
  static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
  {
         struct inode_management *im = &sbi->im[type];
-       struct ino_entry *e;
+       struct ino_entry *e, *tmp;
+
+       tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
  retry:
-       if (radix_tree_preload(GFP_NOFS)) {
-               cond_resched();
-               goto retry;
-       }
+       radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
  
         spin_lock(&im->ino_lock);
-
         e = radix_tree_lookup(&im->ino_root, ino);
         if (!e) {
-               e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
-               if (!e) {
-                       spin_unlock(&im->ino_lock);
-                       radix_tree_preload_end();
-                       goto retry;
-               }
+               e = tmp;
                 if (radix_tree_insert(&im->ino_root, ino, e)) {
                         spin_unlock(&im->ino_lock);
-                       kmem_cache_free(ino_entry_slab, e);
                         radix_tree_preload_end();
                         goto retry;
                 }
@@ -352,6 +387,9 @@ retry:
         }
         spin_unlock(&im->ino_lock);
         radix_tree_preload_end();
+
+       if (e != tmp)
+               kmem_cache_free(ino_entry_slab, tmp);
  }
  
  static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -452,29 +490,39 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
         __remove_ino_entry(sbi, ino, ORPHAN_INO);
  }
  
-static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
+static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
  {
-       struct inode *inode = f2fs_iget(sbi->sb, ino);
-       f2fs_bug_on(sbi, IS_ERR(inode));
+       struct inode *inode;
+
+       inode = f2fs_iget(sbi->sb, ino);
+       if (IS_ERR(inode)) {
+               /*
+                * there should be a bug that we can't find the entry
+                * to orphan inode.
+                */
+               f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
+               return PTR_ERR(inode);
+       }
+
         clear_nlink(inode);
  
         /* truncate all the data during iput */
         iput(inode);
+       return 0;
  }
  
-void recover_orphan_inodes(struct f2fs_sb_info *sbi)
+int recover_orphan_inodes(struct f2fs_sb_info *sbi)
  {
         block_t start_blk, orphan_blocks, i, j;
+       int err;
  
         if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
-               return;
-
-       set_sbi_flag(sbi, SBI_POR_DOING);
+               return 0;
  
         start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
         orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
  
-       ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
+       ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
  
         for (i = 0; i < orphan_blocks; i++) {
                 struct page *page = get_meta_page(sbi, start_blk + i);
@@ -483,14 +531,17 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
                 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
                 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
                         nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
-                       recover_orphan_inode(sbi, ino);
+                       err = recover_orphan_inode(sbi, ino);
+                       if (err) {
+                               f2fs_put_page(page, 1);
+                               return err;
+                       }
                 }
                 f2fs_put_page(page, 1);
         }
         /* clear Orphan Flag */
         clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
-       clear_sbi_flag(sbi, SBI_POR_DOING);
-       return;
+       return 0;
  }
  
  static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
@@ -498,7 +549,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
         struct list_head *head;
         struct f2fs_orphan_block *orphan_blk = NULL;
         unsigned int nentries = 0;
-       unsigned short index;
+       unsigned short index = 1;
         unsigned short orphan_blocks;
         struct page *page = NULL;
         struct ino_entry *orphan = NULL;
@@ -506,22 +557,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
  
         orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
  
-       for (index = 0; index < orphan_blocks; index++)
-               grab_meta_page(sbi, start_blk + index);
-
-       index = 1;
-       spin_lock(&im->ino_lock);
+       /*
+        * we don't need to do spin_lock(&im->ino_lock) here, since all the
+        * orphan inode operations are covered under f2fs_lock_op().
+        * And, spin_lock should be avoided due to page operations below.
+        */
         head = &im->ino_list;
  
         /* loop for each orphan inode entry and write them in Jornal block */
         list_for_each_entry(orphan, head, list) {
                 if (!page) {
-                       page = find_get_page(META_MAPPING(sbi), start_blk++);
-                       f2fs_bug_on(sbi, !page);
+                       page = grab_meta_page(sbi, start_blk++);
                         orphan_blk =
                                 (struct f2fs_orphan_block *)page_address(page);
                         memset(orphan_blk, 0, sizeof(*orphan_blk));
-                       f2fs_put_page(page, 0);
                 }
  
                 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
@@ -550,8 +599,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
                 set_page_dirty(page);
                 f2fs_put_page(page, 1);
         }
-
-       spin_unlock(&im->ino_lock);
  }
  
  static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -695,7 +742,8 @@ void update_dirty_page(struct inode *inode, struct page *page)
         struct inode_entry *new;
         int ret = 0;
  
-       if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
+       if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
+                       !S_ISLNK(inode->i_mode))
                 return;
  
         if (!S_ISDIR(inode->i_mode)) {
@@ -879,18 +927,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
         nid_t last_nid = nm_i->next_scan_nid;
         block_t start_blk;
-       struct page *cp_page;
         unsigned int data_sum_blocks, orphan_blocks;
         __u32 crc32 = 0;
-       void *kaddr;
         int i;
         int cp_payload_blks = __cp_payload(sbi);
+       block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
+       bool invalidate = false;
  
         /*
          * This avoids to conduct wrong roll-forward operations and uses
          * metapages, so should be called prior to sync_meta_pages below.
          */
-       discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
+       if (discard_next_dnode(sbi, discard_blk))
+               invalidate = true;
  
         /* Flush all the NAT/SIT pages */
         while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -978,20 +1027,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
  
         start_blk = __start_cp_addr(sbi);
  
+       /* need to wait for end_io results */
+       wait_on_all_pages_writeback(sbi);
+       if (unlikely(f2fs_cp_error(sbi)))
+               return;
+
         /* write out checkpoint buffer at block 0 */
-       cp_page = grab_meta_page(sbi, start_blk++);
-       kaddr = page_address(cp_page);
-       memcpy(kaddr, ckpt, F2FS_BLKSIZE);
-       set_page_dirty(cp_page);
-       f2fs_put_page(cp_page, 1);
-
-       for (i = 1; i < 1 + cp_payload_blks; i++) {
-               cp_page = grab_meta_page(sbi, start_blk++);
-               kaddr = page_address(cp_page);
-               memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE);
-               set_page_dirty(cp_page);
-               f2fs_put_page(cp_page, 1);
-       }
+       update_meta_page(sbi, ckpt, start_blk++);
+
+       for (i = 1; i < 1 + cp_payload_blks; i++)
+               update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
+                                                       start_blk++);
  
         if (orphan_num) {
                 write_orphan_inodes(sbi, start_blk);
@@ -1006,11 +1052,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         }
  
         /* writeout checkpoint block */
-       cp_page = grab_meta_page(sbi, start_blk);
-       kaddr = page_address(cp_page);
-       memcpy(kaddr, ckpt, F2FS_BLKSIZE);
-       set_page_dirty(cp_page);
-       f2fs_put_page(cp_page, 1);
+       update_meta_page(sbi, ckpt, start_blk);
  
         /* wait for previous submitted node/meta pages writeback */
         wait_on_all_pages_writeback(sbi);
@@ -1031,12 +1073,20 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         /* wait for previous submitted meta pages writeback */
         wait_on_all_pages_writeback(sbi);
  
+       /*
+        * invalidate meta page which is used temporarily for zeroing out
+        * block at the end of warm node chain.
+        */
+       if (invalidate)
+               invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
+                                                               discard_blk);
+
         release_dirty_inode(sbi);
  
         if (unlikely(f2fs_cp_error(sbi)))
                 return;
  
-       clear_prefree_segments(sbi);
+       clear_prefree_segments(sbi, cpc);
         clear_sbi_flag(sbi, SBI_IS_DIRTY);
  }
  
@@ -1051,7 +1101,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         mutex_lock(&sbi->cp_mutex);
  
         if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
-               (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
+               (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
+               (cpc->reason == CP_DISCARD && !sbi->discard_blks)))
                 goto out;
         if (unlikely(f2fs_cp_error(sbi)))
                 goto out;
@@ -1090,6 +1141,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         if (cpc->reason == CP_RECOVERY)
                 f2fs_msg(sbi->sb, KERN_NOTICE,
                         "checkpoint: version = %llx", ckpt_ver);
+
+       /* do checkpoint periodically */
+       sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
  out:
         mutex_unlock(&sbi->cp_mutex);
         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");