These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / fs / f2fs / super.c
index b2dd1b0..3a65e01 100644 (file)
@@ -39,6 +39,13 @@ static struct proc_dir_entry *f2fs_proc_root;
 static struct kmem_cache *f2fs_inode_cachep;
 static struct kset *f2fs_kset;
 
+/* f2fs-wide shrinker description */
+static struct shrinker f2fs_shrinker_info = {
+       .scan_objects = f2fs_shrink_scan,
+       .count_objects = f2fs_shrink_count,
+       .seeks = DEFAULT_SEEKS,
+};
+
 enum {
        Opt_gc_background,
        Opt_disable_roll_forward,
@@ -58,6 +65,7 @@ enum {
        Opt_nobarrier,
        Opt_fastboot,
        Opt_extent_cache,
+       Opt_noextent_cache,
        Opt_noinline_data,
        Opt_err,
 };
@@ -81,6 +89,7 @@ static match_table_t f2fs_tokens = {
        {Opt_nobarrier, "nobarrier"},
        {Opt_fastboot, "fastboot"},
        {Opt_extent_cache, "extent_cache"},
+       {Opt_noextent_cache, "noextent_cache"},
        {Opt_noinline_data, "noinline_data"},
        {Opt_err, NULL},
 };
@@ -204,8 +213,10 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -222,6 +233,8 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(max_victim_search),
        ATTR_LIST(dir_level),
        ATTR_LIST(ram_thresh),
+       ATTR_LIST(ra_nid_pages),
+       ATTR_LIST(cp_interval),
        NULL,
 };
 
@@ -258,6 +271,7 @@ static void init_once(void *foo)
 static int parse_options(struct super_block *sb, char *options)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       struct request_queue *q;
        substring_t args[MAX_OPT_ARGS];
        char *p, *name;
        int arg = 0;
@@ -282,11 +296,16 @@ static int parse_options(struct super_block *sb, char *options)
 
                        if (!name)
                                return -ENOMEM;
-                       if (strlen(name) == 2 && !strncmp(name, "on", 2))
+                       if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
                                set_opt(sbi, BG_GC);
-                       else if (strlen(name) == 3 && !strncmp(name, "off", 3))
+                               clear_opt(sbi, FORCE_FG_GC);
+                       } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
                                clear_opt(sbi, BG_GC);
-                       else {
+                               clear_opt(sbi, FORCE_FG_GC);
+                       } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
+                               set_opt(sbi, BG_GC);
+                               set_opt(sbi, FORCE_FG_GC);
+                       } else {
                                kfree(name);
                                return -EINVAL;
                        }
@@ -302,7 +321,14 @@ static int parse_options(struct super_block *sb, char *options)
                                return -EINVAL;
                        break;
                case Opt_discard:
-                       set_opt(sbi, DISCARD);
+                       q = bdev_get_queue(sb->s_bdev);
+                       if (blk_queue_discard(q)) {
+                               set_opt(sbi, DISCARD);
+                       } else {
+                               f2fs_msg(sb, KERN_WARNING,
+                                       "mounting with \"discard\" option, but "
+                                       "the device does not support discard");
+                       }
                        break;
                case Opt_noheap:
                        set_opt(sbi, NOHEAP);
@@ -374,6 +400,9 @@ static int parse_options(struct super_block *sb, char *options)
                case Opt_extent_cache:
                        set_opt(sbi, EXTENT_CACHE);
                        break;
+               case Opt_noextent_cache:
+                       clear_opt(sbi, EXTENT_CACHE);
+                       break;
                case Opt_noinline_data:
                        clear_opt(sbi, INLINE_DATA);
                        break;
@@ -402,9 +431,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
        atomic_set(&fi->dirty_pages, 0);
        fi->i_current_depth = 1;
        fi->i_advise = 0;
-       rwlock_init(&fi->ext_lock);
        init_rwsem(&fi->i_sem);
-       INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
        INIT_LIST_HEAD(&fi->inmem_pages);
        mutex_init(&fi->inmem_lock);
 
@@ -416,6 +443,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
        /* Will be used by directory only */
        fi->i_dir_level = F2FS_SB(sb)->dir_level;
 
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+       fi->i_crypt_info = NULL;
+#endif
        return &fi->vfs_inode;
 }
 
@@ -428,8 +458,37 @@ static int f2fs_drop_inode(struct inode *inode)
         *    - f2fs_gc -> iput -> evict
         *       - inode_wait_for_writeback(inode)
         */
-       if (!inode_unhashed(inode) && inode->i_state & I_SYNC)
+       if (!inode_unhashed(inode) && inode->i_state & I_SYNC) {
+               if (!inode->i_nlink && !is_bad_inode(inode)) {
+                       /* to avoid evict_inode call simultaneously */
+                       atomic_inc(&inode->i_count);
+                       spin_unlock(&inode->i_lock);
+
+                       /* some remained atomic pages should discarded */
+                       if (f2fs_is_atomic_file(inode))
+                               commit_inmem_pages(inode, true);
+
+                       /* should remain fi->extent_tree for writepage */
+                       f2fs_destroy_extent_node(inode);
+
+                       sb_start_intwrite(inode->i_sb);
+                       i_size_write(inode, 0);
+
+                       if (F2FS_HAS_BLOCKS(inode))
+                               f2fs_truncate(inode, true);
+
+                       sb_end_intwrite(inode->i_sb);
+
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+                       if (F2FS_I(inode)->i_crypt_info)
+                               f2fs_free_encryption_info(inode,
+                                       F2FS_I(inode)->i_crypt_info);
+#endif
+                       spin_lock(&inode->i_lock);
+                       atomic_dec(&inode->i_count);
+               }
                return 0;
+       }
        return generic_drop_inode(inode);
 }
 
@@ -464,9 +523,11 @@ static void f2fs_put_super(struct super_block *sb)
        }
        kobject_del(&sbi->s_kobj);
 
-       f2fs_destroy_stats(sbi);
        stop_gc_thread(sbi);
 
+       /* prevent remaining shrinker jobs */
+       mutex_lock(&sbi->umount_mutex);
+
        /*
         * We don't need to do checkpoint when superblock is clean.
         * But, the previous checkpoint was not done by umount, it needs to do
@@ -480,6 +541,9 @@ static void f2fs_put_super(struct super_block *sb)
                write_checkpoint(sbi, &cpc);
        }
 
+       /* write_checkpoint can update stat informaion */
+       f2fs_destroy_stats(sbi);
+
        /*
         * normally superblock is clean, so we need to release this.
         * In addition, EIO will skip do checkpoint, we need this as well.
@@ -487,6 +551,9 @@ static void f2fs_put_super(struct super_block *sb)
        release_dirty_inode(sbi);
        release_discard_addrs(sbi);
 
+       f2fs_leave_shrinker(sbi);
+       mutex_unlock(&sbi->umount_mutex);
+
        iput(sbi->node_inode);
        iput(sbi->meta_inode);
 
@@ -520,7 +587,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
        } else {
                f2fs_balance_fs(sbi);
        }
-       f2fs_trace_ios(NULL, NULL, 1);
+       f2fs_trace_ios(NULL, 1);
 
        return 0;
 }
@@ -573,10 +640,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
 
-       if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
-               seq_printf(seq, ",background_gc=%s", "on");
-       else
+       if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
+               if (test_opt(sbi, FORCE_FG_GC))
+                       seq_printf(seq, ",background_gc=%s", "sync");
+               else
+                       seq_printf(seq, ",background_gc=%s", "on");
+       } else {
                seq_printf(seq, ",background_gc=%s", "off");
+       }
        if (test_opt(sbi, DISABLE_ROLL_FORWARD))
                seq_puts(seq, ",disable_roll_forward");
        if (test_opt(sbi, DISCARD))
@@ -613,6 +684,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                seq_puts(seq, ",fastboot");
        if (test_opt(sbi, EXTENT_CACHE))
                seq_puts(seq, ",extent_cache");
+       else
+               seq_puts(seq, ",noextent_cache");
        seq_printf(seq, ",active_logs=%u", sbi->active_logs);
 
        return 0;
@@ -633,7 +706,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
                struct seg_entry *se = get_seg_entry(sbi, i);
 
                if ((i % 10) == 0)
-                       seq_printf(seq, "%-5d", i);
+                       seq_printf(seq, "%-10d", i);
                seq_printf(seq, "%d|%-3u", se->type,
                                        get_valid_blocks(sbi, i, 1));
                if ((i % 10) == 9 || i == (total_segs - 1))
@@ -658,6 +731,23 @@ static const struct file_operations f2fs_seq_segment_info_fops = {
        .release = single_release,
 };
 
+static void default_options(struct f2fs_sb_info *sbi)
+{
+       /* init some FS parameters */
+       sbi->active_logs = NR_CURSEG_TYPE;
+
+       set_opt(sbi, BG_GC);
+       set_opt(sbi, INLINE_DATA);
+       set_opt(sbi, EXTENT_CACHE);
+
+#ifdef CONFIG_F2FS_FS_XATTR
+       set_opt(sbi, XATTR_USER);
+#endif
+#ifdef CONFIG_F2FS_FS_POSIX_ACL
+       set_opt(sbi, POSIX_ACL);
+#endif
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -665,6 +755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        int err, active_logs;
        bool need_restart_gc = false;
        bool need_stop_gc = false;
+       bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
 
        sync_filesystem(sb);
 
@@ -676,7 +767,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        active_logs = sbi->active_logs;
 
        sbi->mount_opt.opt = 0;
-       sbi->active_logs = NR_CURSEG_TYPE;
+       default_options(sbi);
 
        /* parse mount options */
        err = parse_options(sb, data);
@@ -690,6 +781,14 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
                goto skip;
 
+       /* disallow enable/disable extent_cache dynamically */
+       if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
+               err = -EINVAL;
+               f2fs_msg(sbi->sb, KERN_WARNING,
+                               "switch extent_cache option is not allowed");
+               goto restore_opts;
+       }
+
        /*
         * We stop the GC thread if FS is mounted as RO
         * or if background_gc = off is passed in mount
@@ -919,7 +1018,11 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
                atomic_set(&sbi->nr_pages[i], 0);
 
        sbi->dir_level = DEF_DIR_LEVEL;
+       sbi->cp_interval = DEF_CP_INTERVAL;
        clear_sbi_flag(sbi, SBI_NEED_FSCK);
+
+       INIT_LIST_HEAD(&sbi->s_list);
+       mutex_init(&sbi->umount_mutex);
 }
 
 /*
@@ -929,29 +1032,36 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
  */
 static int read_raw_super_block(struct super_block *sb,
                        struct f2fs_super_block **raw_super,
-                       struct buffer_head **raw_super_buf)
+                       struct buffer_head **raw_super_buf,
+                       int *recovery)
 {
        int block = 0;
+       struct buffer_head *buffer;
+       struct f2fs_super_block *super;
+       int err = 0;
 
 retry:
-       *raw_super_buf = sb_bread(sb, block);
-       if (!*raw_super_buf) {
+       buffer = sb_bread(sb, block);
+       if (!buffer) {
+               *recovery = 1;
                f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
                                block + 1);
                if (block == 0) {
                        block++;
                        goto retry;
                } else {
-                       return -EIO;
+                       err = -EIO;
+                       goto out;
                }
        }
 
-       *raw_super = (struct f2fs_super_block *)
-               ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
+       super = (struct f2fs_super_block *)
+               ((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
 
        /* sanity checking of raw super */
-       if (sanity_check_raw_super(sb, *raw_super)) {
-               brelse(*raw_super_buf);
+       if (sanity_check_raw_super(sb, super)) {
+               brelse(buffer);
+               *recovery = 1;
                f2fs_msg(sb, KERN_ERR,
                        "Can't find valid F2FS filesystem in %dth superblock",
                                                                block + 1);
@@ -959,25 +1069,76 @@ retry:
                        block++;
                        goto retry;
                } else {
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out;
                }
        }
 
+       if (!*raw_super) {
+               *raw_super_buf = buffer;
+               *raw_super = super;
+       } else {
+               /* already have a valid superblock */
+               brelse(buffer);
+       }
+
+       /* check the validity of the second superblock */
+       if (block == 0) {
+               block++;
+               goto retry;
+       }
+
+out:
+       /* No valid superblock */
+       if (!*raw_super)
+               return err;
+
        return 0;
 }
 
+int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
+{
+       struct buffer_head *sbh = sbi->raw_super_buf;
+       sector_t block = sbh->b_blocknr;
+       int err;
+
+       /* write back-up superblock first */
+       sbh->b_blocknr = block ? 0 : 1;
+       mark_buffer_dirty(sbh);
+       err = sync_dirty_buffer(sbh);
+
+       sbh->b_blocknr = block;
+
+       /* if we are in recovery path, skip writing valid superblock */
+       if (recover || err)
+               goto out;
+
+       /* write current valid superblock */
+       mark_buffer_dirty(sbh);
+       err = sync_dirty_buffer(sbh);
+out:
+       clear_buffer_write_io_error(sbh);
+       set_buffer_uptodate(sbh);
+       return err;
+}
+
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct f2fs_sb_info *sbi;
-       struct f2fs_super_block *raw_super = NULL;
+       struct f2fs_super_block *raw_super;
        struct buffer_head *raw_super_buf;
        struct inode *root;
-       long err = -EINVAL;
+       long err;
        bool retry = true, need_fsck = false;
        char *options = NULL;
-       int i;
+       int recovery, i;
 
 try_onemore:
+       err = -EINVAL;
+       raw_super = NULL;
+       raw_super_buf = NULL;
+       recovery = 0;
+
        /* allocate memory for f2fs-specific super block info */
        sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
        if (!sbi)
@@ -989,23 +1150,12 @@ try_onemore:
                goto free_sbi;
        }
 
-       err = read_raw_super_block(sb, &raw_super, &raw_super_buf);
+       err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery);
        if (err)
                goto free_sbi;
 
        sb->s_fs_info = sbi;
-       /* init some FS parameters */
-       sbi->active_logs = NR_CURSEG_TYPE;
-
-       set_opt(sbi, BG_GC);
-       set_opt(sbi, INLINE_DATA);
-
-#ifdef CONFIG_F2FS_FS_XATTR
-       set_opt(sbi, XATTR_USER);
-#endif
-#ifdef CONFIG_F2FS_FS_POSIX_ACL
-       set_opt(sbi, POSIX_ACL);
-#endif
+       default_options(sbi);
        /* parse mount options */
        options = kstrdup((const char *)data, GFP_KERNEL);
        if (data && !options) {
@@ -1038,7 +1188,9 @@ try_onemore:
        mutex_init(&sbi->writepages);
        mutex_init(&sbi->cp_mutex);
        init_rwsem(&sbi->node_write);
-       clear_sbi_flag(sbi, SBI_POR_DOING);
+
+       /* disallow all the data/node/meta page writes */
+       set_sbi_flag(sbi, SBI_POR_DOING);
        spin_lock_init(&sbi->stat_lock);
 
        init_rwsem(&sbi->read_io.io_rwsem);
@@ -1115,8 +1267,12 @@ try_onemore:
                goto free_nm;
        }
 
+       f2fs_join_shrinker(sbi);
+
        /* if there are nt orphan nodes free them */
-       recover_orphan_inodes(sbi);
+       err = recover_orphan_inodes(sbi);
+       if (err)
+               goto free_node_inode;
 
        /* read root inode and dentry */
        root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
@@ -1148,14 +1304,6 @@ try_onemore:
                proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
                                 &f2fs_seq_segment_info_fops, sb);
 
-       if (test_opt(sbi, DISCARD)) {
-               struct request_queue *q = bdev_get_queue(sb->s_bdev);
-               if (!blk_queue_discard(q))
-                       f2fs_msg(sb, KERN_WARNING,
-                                       "mounting with \"discard\" option, but "
-                                       "the device does not support discard");
-       }
-
        sbi->s_kobj.kset = f2fs_kset;
        init_completion(&sbi->s_kobj_unregister);
        err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
@@ -1186,6 +1334,8 @@ try_onemore:
                        goto free_kobj;
                }
        }
+       /* recover_fsync_data() cleared this already */
+       clear_sbi_flag(sbi, SBI_POR_DOING);
 
        /*
         * If filesystem is not mounted as read-only then
@@ -1198,6 +1348,15 @@ try_onemore:
                        goto free_kobj;
        }
        kfree(options);
+
+       /* recover broken superblock */
+       if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) {
+               f2fs_msg(sb, KERN_INFO, "Recover invalid superblock");
+               f2fs_commit_super(sbi, true);
+       }
+
+       sbi->cp_expires = round_jiffies_up(jiffies);
+
        return 0;
 
 free_kobj:
@@ -1212,7 +1371,10 @@ free_root_inode:
        dput(sb->s_root);
        sb->s_root = NULL;
 free_node_inode:
+       mutex_lock(&sbi->umount_mutex);
+       f2fs_leave_shrinker(sbi);
        iput(sbi->node_inode);
+       mutex_unlock(&sbi->umount_mutex);
 free_nm:
        destroy_node_manager(sbi);
 free_sm:
@@ -1305,13 +1467,25 @@ static int __init init_f2fs_fs(void)
                err = -ENOMEM;
                goto free_extent_cache;
        }
-       err = register_filesystem(&f2fs_fs_type);
+       err = f2fs_init_crypto();
        if (err)
                goto free_kset;
+
+       err = register_shrinker(&f2fs_shrinker_info);
+       if (err)
+               goto free_crypto;
+
+       err = register_filesystem(&f2fs_fs_type);
+       if (err)
+               goto free_shrinker;
        f2fs_create_root_stats();
        f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
        return 0;
 
+free_shrinker:
+       unregister_shrinker(&f2fs_shrinker_info);
+free_crypto:
+       f2fs_exit_crypto();
 free_kset:
        kset_unregister(f2fs_kset);
 free_extent_cache:
@@ -1332,7 +1506,9 @@ static void __exit exit_f2fs_fs(void)
 {
        remove_proc_entry("fs/f2fs", NULL);
        f2fs_destroy_root_stats();
+       unregister_shrinker(&f2fs_shrinker_info);
        unregister_filesystem(&f2fs_fs_type);
+       f2fs_exit_crypto();
        destroy_extent_cache();
        destroy_checkpoint_caches();
        destroy_segment_manager_caches();