kernel/fs/logfs/dev_bdev.c

   1 /*
   2  * fs/logfs/dev_bdev.c  - Device access methods for block devices
   3  *
   4  * As should be obvious for Linux kernel code, license is GPLv2
   5  *
   6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7  */
   8 #include "logfs.h"
   9 #include <linux/bio.h>
  10 #include <linux/blkdev.h>
  11 #include <linux/buffer_head.h>
  12 #include <linux/gfp.h>
  13 #include <linux/prefetch.h>
  14
  15 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
  16
  17 static int sync_request(struct page *page, struct block_device *bdev, int rw)
  18 {
  19         struct bio bio;
  20         struct bio_vec bio_vec;
  21
  22         bio_init(&bio);
  23         bio.bi_max_vecs = 1;
  24         bio.bi_io_vec = &bio_vec;
  25         bio_vec.bv_page = page;
  26         bio_vec.bv_len = PAGE_SIZE;
  27         bio_vec.bv_offset = 0;
  28         bio.bi_vcnt = 1;
  29         bio.bi_bdev = bdev;
  30         bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
  31         bio.bi_iter.bi_size = PAGE_SIZE;
  32
  33         return submit_bio_wait(rw, &bio);
  34 }
  35
  36 static int bdev_readpage(void *_sb, struct page *page)
  37 {
  38         struct super_block *sb = _sb;
  39         struct block_device *bdev = logfs_super(sb)->s_bdev;
  40         int err;
  41
  42         err = sync_request(page, bdev, READ);
  43         if (err) {
  44                 ClearPageUptodate(page);
  45                 SetPageError(page);
  46         } else {
  47                 SetPageUptodate(page);
  48                 ClearPageError(page);
  49         }
  50         unlock_page(page);
  51         return err;
  52 }
  53
  54 static DECLARE_WAIT_QUEUE_HEAD(wq);
  55
  56 static void writeseg_end_io(struct bio *bio)
  57 {
  58         struct bio_vec *bvec;
  59         int i;
  60         struct super_block *sb = bio->bi_private;
  61         struct logfs_super *super = logfs_super(sb);
  62
  63         BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */
  64
  65         bio_for_each_segment_all(bvec, bio, i) {
  66                 end_page_writeback(bvec->bv_page);
  67                 page_cache_release(bvec->bv_page);
  68         }
  69         bio_put(bio);
  70         if (atomic_dec_and_test(&super->s_pending_writes))
  71                 wake_up(&wq);
  72 }
  73
  74 static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
  75                 size_t nr_pages)
  76 {
  77         struct logfs_super *super = logfs_super(sb);
  78         struct address_space *mapping = super->s_mapping_inode->i_mapping;
  79         struct bio *bio;
  80         struct page *page;
  81         unsigned int max_pages;
  82         int i;
  83
  84         max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES);
  85
  86         bio = bio_alloc(GFP_NOFS, max_pages);
  87         BUG_ON(!bio);
  88
  89         for (i = 0; i < nr_pages; i++) {
  90                 if (i >= max_pages) {
  91                         /* Block layer cannot split bios :( */
  92                         bio->bi_vcnt = i;
  93                         bio->bi_iter.bi_size = i * PAGE_SIZE;
  94                         bio->bi_bdev = super->s_bdev;
  95                         bio->bi_iter.bi_sector = ofs >> 9;
  96                         bio->bi_private = sb;
  97                         bio->bi_end_io = writeseg_end_io;
  98                         atomic_inc(&super->s_pending_writes);
  99                         submit_bio(WRITE, bio);
 100
 101                         ofs += i * PAGE_SIZE;
 102                         index += i;
 103                         nr_pages -= i;
 104                         i = 0;
 105
 106                         bio = bio_alloc(GFP_NOFS, max_pages);
 107                         BUG_ON(!bio);
 108                 }
 109                 page = find_lock_page(mapping, index + i);
 110                 BUG_ON(!page);
 111                 bio->bi_io_vec[i].bv_page = page;
 112                 bio->bi_io_vec[i].bv_len = PAGE_SIZE;
 113                 bio->bi_io_vec[i].bv_offset = 0;
 114
 115                 BUG_ON(PageWriteback(page));
 116                 set_page_writeback(page);
 117                 unlock_page(page);
 118         }
 119         bio->bi_vcnt = nr_pages;
 120         bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
 121         bio->bi_bdev = super->s_bdev;
 122         bio->bi_iter.bi_sector = ofs >> 9;
 123         bio->bi_private = sb;
 124         bio->bi_end_io = writeseg_end_io;
 125         atomic_inc(&super->s_pending_writes);
 126         submit_bio(WRITE, bio);
 127         return 0;
 128 }
 129
 130 static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
 131 {
 132         struct logfs_super *super = logfs_super(sb);
 133         int head;
 134
 135         BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO);
 136
 137         if (len == 0) {
 138                 /* This can happen when the object fit perfectly into a
 139                  * segment, the segment gets written per sync and subsequently
 140                  * closed.
 141                  */
 142                 return;
 143         }
 144         head = ofs & (PAGE_SIZE - 1);
 145         if (head) {
 146                 ofs -= head;
 147                 len += head;
 148         }
 149         len = PAGE_ALIGN(len);
 150         __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
 151 }
 152
 153
 154 static void erase_end_io(struct bio *bio)
 155 {
 156         struct super_block *sb = bio->bi_private;
 157         struct logfs_super *super = logfs_super(sb);
 158
 159         BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */
 160         BUG_ON(bio->bi_vcnt == 0);
 161         bio_put(bio);
 162         if (atomic_dec_and_test(&super->s_pending_writes))
 163                 wake_up(&wq);
 164 }
 165
 166 static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
 167                 size_t nr_pages)
 168 {
 169         struct logfs_super *super = logfs_super(sb);
 170         struct bio *bio;
 171         unsigned int max_pages;
 172         int i;
 173
 174         max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES);
 175
 176         bio = bio_alloc(GFP_NOFS, max_pages);
 177         BUG_ON(!bio);
 178
 179         for (i = 0; i < nr_pages; i++) {
 180                 if (i >= max_pages) {
 181                         /* Block layer cannot split bios :( */
 182                         bio->bi_vcnt = i;
 183                         bio->bi_iter.bi_size = i * PAGE_SIZE;
 184                         bio->bi_bdev = super->s_bdev;
 185                         bio->bi_iter.bi_sector = ofs >> 9;
 186                         bio->bi_private = sb;
 187                         bio->bi_end_io = erase_end_io;
 188                         atomic_inc(&super->s_pending_writes);
 189                         submit_bio(WRITE, bio);
 190
 191                         ofs += i * PAGE_SIZE;
 192                         index += i;
 193                         nr_pages -= i;
 194                         i = 0;
 195
 196                         bio = bio_alloc(GFP_NOFS, max_pages);
 197                         BUG_ON(!bio);
 198                 }
 199                 bio->bi_io_vec[i].bv_page = super->s_erase_page;
 200                 bio->bi_io_vec[i].bv_len = PAGE_SIZE;
 201                 bio->bi_io_vec[i].bv_offset = 0;
 202         }
 203         bio->bi_vcnt = nr_pages;
 204         bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
 205         bio->bi_bdev = super->s_bdev;
 206         bio->bi_iter.bi_sector = ofs >> 9;
 207         bio->bi_private = sb;
 208         bio->bi_end_io = erase_end_io;
 209         atomic_inc(&super->s_pending_writes);
 210         submit_bio(WRITE, bio);
 211         return 0;
 212 }
 213
 214 static int bdev_erase(struct super_block *sb, loff_t to, size_t len,
 215                 int ensure_write)
 216 {
 217         struct logfs_super *super = logfs_super(sb);
 218
 219         BUG_ON(to & (PAGE_SIZE - 1));
 220         BUG_ON(len & (PAGE_SIZE - 1));
 221
 222         if (super->s_flags & LOGFS_SB_FLAG_RO)
 223                 return -EROFS;
 224
 225         if (ensure_write) {
 226                 /*
 227                  * Object store doesn't care whether erases happen or not.
 228                  * But for the journal they are required.  Otherwise a scan
 229                  * can find an old commit entry and assume it is the current
 230                  * one, travelling back in time.
 231                  */
 232                 do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT);
 233         }
 234
 235         return 0;
 236 }
 237
 238 static void bdev_sync(struct super_block *sb)
 239 {
 240         struct logfs_super *super = logfs_super(sb);
 241
 242         wait_event(wq, atomic_read(&super->s_pending_writes) == 0);
 243 }
 244
 245 static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs)
 246 {
 247         struct logfs_super *super = logfs_super(sb);
 248         struct address_space *mapping = super->s_mapping_inode->i_mapping;
 249         filler_t *filler = bdev_readpage;
 250
 251         *ofs = 0;
 252         return read_cache_page(mapping, 0, filler, sb);
 253 }
 254
 255 static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs)
 256 {
 257         struct logfs_super *super = logfs_super(sb);
 258         struct address_space *mapping = super->s_mapping_inode->i_mapping;
 259         filler_t *filler = bdev_readpage;
 260         u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000;
 261         pgoff_t index = pos >> PAGE_SHIFT;
 262
 263         *ofs = pos;
 264         return read_cache_page(mapping, index, filler, sb);
 265 }
 266
 267 static int bdev_write_sb(struct super_block *sb, struct page *page)
 268 {
 269         struct block_device *bdev = logfs_super(sb)->s_bdev;
 270
 271         /* Nothing special to do for block devices. */
 272         return sync_request(page, bdev, WRITE);
 273 }
 274
 275 static void bdev_put_device(struct logfs_super *s)
 276 {
 277         blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 278 }
 279
 280 static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
 281 {
 282         return 0;
 283 }
 284
 285 static const struct logfs_device_ops bd_devops = {
 286         .find_first_sb  = bdev_find_first_sb,
 287         .find_last_sb   = bdev_find_last_sb,
 288         .write_sb       = bdev_write_sb,
 289         .readpage       = bdev_readpage,
 290         .writeseg       = bdev_writeseg,
 291         .erase          = bdev_erase,
 292         .can_write_buf  = bdev_can_write_buf,
 293         .sync           = bdev_sync,
 294         .put_device     = bdev_put_device,
 295 };
 296
 297 int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
 298                 const char *devname)
 299 {
 300         struct block_device *bdev;
 301
 302         bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 303                                   type);
 304         if (IS_ERR(bdev))
 305                 return PTR_ERR(bdev);
 306
 307         if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
 308                 int mtdnr = MINOR(bdev->bd_dev);
 309                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 310                 return logfs_get_sb_mtd(p, mtdnr);
 311         }
 312
 313         p->s_bdev = bdev;
 314         p->s_mtd = NULL;
 315         p->s_devops = &bd_devops;
 316         return 0;
 317 }