Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / fs / cachefiles / rdwr.c
1 /* Storage object read/write
2  *
3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public Licence
8  * as published by the Free Software Foundation; either version
9  * 2 of the Licence, or (at your option) any later version.
10  */
11
12 #include <linux/mount.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/swap.h>
16 #include "internal.h"
17
18 /*
19  * detect wake up events generated by the unlocking of pages in which we're
20  * interested
21  * - we use this to detect read completion of backing pages
22  * - the caller holds the waitqueue lock
23  */
24 static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
25                                   int sync, void *_key)
26 {
27         struct cachefiles_one_read *monitor =
28                 container_of(wait, struct cachefiles_one_read, monitor);
29         struct cachefiles_object *object;
30         struct wait_bit_key *key = _key;
31         struct page *page = wait->private;
32
33         ASSERT(key);
34
35         _enter("{%lu},%u,%d,{%p,%u}",
36                monitor->netfs_page->index, mode, sync,
37                key->flags, key->bit_nr);
38
39         if (key->flags != &page->flags ||
40             key->bit_nr != PG_locked)
41                 return 0;
42
43         _debug("--- monitor %p %lx ---", page, page->flags);
44
45         if (!PageUptodate(page) && !PageError(page)) {
46                 /* unlocked, not uptodate and not erronous? */
47                 _debug("page probably truncated");
48         }
49
50         /* remove from the waitqueue */
51         list_del(&wait->task_list);
52
53         /* move onto the action list and queue for FS-Cache thread pool */
54         ASSERT(monitor->op);
55
56         object = container_of(monitor->op->op.object,
57                               struct cachefiles_object, fscache);
58
59         spin_lock(&object->work_lock);
60         list_add_tail(&monitor->op_link, &monitor->op->to_do);
61         spin_unlock(&object->work_lock);
62
63         fscache_enqueue_retrieval(monitor->op);
64         return 0;
65 }
66
67 /*
68  * handle a probably truncated page
69  * - check to see if the page is still relevant and reissue the read if
70  *   possible
71  * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
72  *   must wait again and 0 if successful
73  */
74 static int cachefiles_read_reissue(struct cachefiles_object *object,
75                                    struct cachefiles_one_read *monitor)
76 {
77         struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
78         struct page *backpage = monitor->back_page, *backpage2;
79         int ret;
80
81         _enter("{ino=%lx},{%lx,%lx}",
82                d_backing_inode(object->backer)->i_ino,
83                backpage->index, backpage->flags);
84
85         /* skip if the page was truncated away completely */
86         if (backpage->mapping != bmapping) {
87                 _leave(" = -ENODATA [mapping]");
88                 return -ENODATA;
89         }
90
91         backpage2 = find_get_page(bmapping, backpage->index);
92         if (!backpage2) {
93                 _leave(" = -ENODATA [gone]");
94                 return -ENODATA;
95         }
96
97         if (backpage != backpage2) {
98                 put_page(backpage2);
99                 _leave(" = -ENODATA [different]");
100                 return -ENODATA;
101         }
102
103         /* the page is still there and we already have a ref on it, so we don't
104          * need a second */
105         put_page(backpage2);
106
107         INIT_LIST_HEAD(&monitor->op_link);
108         add_page_wait_queue(backpage, &monitor->monitor);
109
110         if (trylock_page(backpage)) {
111                 ret = -EIO;
112                 if (PageError(backpage))
113                         goto unlock_discard;
114                 ret = 0;
115                 if (PageUptodate(backpage))
116                         goto unlock_discard;
117
118                 _debug("reissue read");
119                 ret = bmapping->a_ops->readpage(NULL, backpage);
120                 if (ret < 0)
121                         goto unlock_discard;
122         }
123
124         /* but the page may have been read before the monitor was installed, so
125          * the monitor may miss the event - so we have to ensure that we do get
126          * one in such a case */
127         if (trylock_page(backpage)) {
128                 _debug("jumpstart %p {%lx}", backpage, backpage->flags);
129                 unlock_page(backpage);
130         }
131
132         /* it'll reappear on the todo list */
133         _leave(" = -EINPROGRESS");
134         return -EINPROGRESS;
135
136 unlock_discard:
137         unlock_page(backpage);
138         spin_lock_irq(&object->work_lock);
139         list_del(&monitor->op_link);
140         spin_unlock_irq(&object->work_lock);
141         _leave(" = %d", ret);
142         return ret;
143 }
144
145 /*
146  * copy data from backing pages to netfs pages to complete a read operation
147  * - driven by FS-Cache's thread pool
148  */
149 static void cachefiles_read_copier(struct fscache_operation *_op)
150 {
151         struct cachefiles_one_read *monitor;
152         struct cachefiles_object *object;
153         struct fscache_retrieval *op;
154         int error, max;
155
156         op = container_of(_op, struct fscache_retrieval, op);
157         object = container_of(op->op.object,
158                               struct cachefiles_object, fscache);
159
160         _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino);
161
162         max = 8;
163         spin_lock_irq(&object->work_lock);
164
165         while (!list_empty(&op->to_do)) {
166                 monitor = list_entry(op->to_do.next,
167                                      struct cachefiles_one_read, op_link);
168                 list_del(&monitor->op_link);
169
170                 spin_unlock_irq(&object->work_lock);
171
172                 _debug("- copy {%lu}", monitor->back_page->index);
173
174         recheck:
175                 if (test_bit(FSCACHE_COOKIE_INVALIDATING,
176                              &object->fscache.cookie->flags)) {
177                         error = -ESTALE;
178                 } else if (PageUptodate(monitor->back_page)) {
179                         copy_highpage(monitor->netfs_page, monitor->back_page);
180                         fscache_mark_page_cached(monitor->op,
181                                                  monitor->netfs_page);
182                         error = 0;
183                 } else if (!PageError(monitor->back_page)) {
184                         /* the page has probably been truncated */
185                         error = cachefiles_read_reissue(object, monitor);
186                         if (error == -EINPROGRESS)
187                                 goto next;
188                         goto recheck;
189                 } else {
190                         cachefiles_io_error_obj(
191                                 object,
192                                 "Readpage failed on backing file %lx",
193                                 (unsigned long) monitor->back_page->flags);
194                         error = -EIO;
195                 }
196
197                 page_cache_release(monitor->back_page);
198
199                 fscache_end_io(op, monitor->netfs_page, error);
200                 page_cache_release(monitor->netfs_page);
201                 fscache_retrieval_complete(op, 1);
202                 fscache_put_retrieval(op);
203                 kfree(monitor);
204
205         next:
206                 /* let the thread pool have some air occasionally */
207                 max--;
208                 if (max < 0 || need_resched()) {
209                         if (!list_empty(&op->to_do))
210                                 fscache_enqueue_retrieval(op);
211                         _leave(" [maxed out]");
212                         return;
213                 }
214
215                 spin_lock_irq(&object->work_lock);
216         }
217
218         spin_unlock_irq(&object->work_lock);
219         _leave("");
220 }
221
222 /*
223  * read the corresponding page to the given set from the backing file
224  * - an uncertain page is simply discarded, to be tried again another time
225  */
226 static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
227                                             struct fscache_retrieval *op,
228                                             struct page *netpage)
229 {
230         struct cachefiles_one_read *monitor;
231         struct address_space *bmapping;
232         struct page *newpage, *backpage;
233         int ret;
234
235         _enter("");
236
237         _debug("read back %p{%lu,%d}",
238                netpage, netpage->index, page_count(netpage));
239
240         monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
241         if (!monitor)
242                 goto nomem;
243
244         monitor->netfs_page = netpage;
245         monitor->op = fscache_get_retrieval(op);
246
247         init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
248
249         /* attempt to get hold of the backing page */
250         bmapping = d_backing_inode(object->backer)->i_mapping;
251         newpage = NULL;
252
253         for (;;) {
254                 backpage = find_get_page(bmapping, netpage->index);
255                 if (backpage)
256                         goto backing_page_already_present;
257
258                 if (!newpage) {
259                         newpage = __page_cache_alloc(cachefiles_gfp |
260                                                      __GFP_COLD);
261                         if (!newpage)
262                                 goto nomem_monitor;
263                 }
264
265                 ret = add_to_page_cache_lru(newpage, bmapping,
266                                             netpage->index, cachefiles_gfp);
267                 if (ret == 0)
268                         goto installed_new_backing_page;
269                 if (ret != -EEXIST)
270                         goto nomem_page;
271         }
272
273         /* we've installed a new backing page, so now we need to start
274          * it reading */
275 installed_new_backing_page:
276         _debug("- new %p", newpage);
277
278         backpage = newpage;
279         newpage = NULL;
280
281 read_backing_page:
282         ret = bmapping->a_ops->readpage(NULL, backpage);
283         if (ret < 0)
284                 goto read_error;
285
286         /* set the monitor to transfer the data across */
287 monitor_backing_page:
288         _debug("- monitor add");
289
290         /* install the monitor */
291         page_cache_get(monitor->netfs_page);
292         page_cache_get(backpage);
293         monitor->back_page = backpage;
294         monitor->monitor.private = backpage;
295         add_page_wait_queue(backpage, &monitor->monitor);
296         monitor = NULL;
297
298         /* but the page may have been read before the monitor was installed, so
299          * the monitor may miss the event - so we have to ensure that we do get
300          * one in such a case */
301         if (trylock_page(backpage)) {
302                 _debug("jumpstart %p {%lx}", backpage, backpage->flags);
303                 unlock_page(backpage);
304         }
305         goto success;
306
307         /* if the backing page is already present, it can be in one of
308          * three states: read in progress, read failed or read okay */
309 backing_page_already_present:
310         _debug("- present");
311
312         if (newpage) {
313                 page_cache_release(newpage);
314                 newpage = NULL;
315         }
316
317         if (PageError(backpage))
318                 goto io_error;
319
320         if (PageUptodate(backpage))
321                 goto backing_page_already_uptodate;
322
323         if (!trylock_page(backpage))
324                 goto monitor_backing_page;
325         _debug("read %p {%lx}", backpage, backpage->flags);
326         goto read_backing_page;
327
328         /* the backing page is already up to date, attach the netfs
329          * page to the pagecache and LRU and copy the data across */
330 backing_page_already_uptodate:
331         _debug("- uptodate");
332
333         fscache_mark_page_cached(op, netpage);
334
335         copy_highpage(netpage, backpage);
336         fscache_end_io(op, netpage, 0);
337         fscache_retrieval_complete(op, 1);
338
339 success:
340         _debug("success");
341         ret = 0;
342
343 out:
344         if (backpage)
345                 page_cache_release(backpage);
346         if (monitor) {
347                 fscache_put_retrieval(monitor->op);
348                 kfree(monitor);
349         }
350         _leave(" = %d", ret);
351         return ret;
352
353 read_error:
354         _debug("read error %d", ret);
355         if (ret == -ENOMEM) {
356                 fscache_retrieval_complete(op, 1);
357                 goto out;
358         }
359 io_error:
360         cachefiles_io_error_obj(object, "Page read error on backing file");
361         fscache_retrieval_complete(op, 1);
362         ret = -ENOBUFS;
363         goto out;
364
365 nomem_page:
366         page_cache_release(newpage);
367 nomem_monitor:
368         fscache_put_retrieval(monitor->op);
369         kfree(monitor);
370 nomem:
371         fscache_retrieval_complete(op, 1);
372         _leave(" = -ENOMEM");
373         return -ENOMEM;
374 }
375
376 /*
377  * read a page from the cache or allocate a block in which to store it
378  * - cache withdrawal is prevented by the caller
379  * - returns -EINTR if interrupted
380  * - returns -ENOMEM if ran out of memory
381  * - returns -ENOBUFS if no buffers can be made available
382  * - returns -ENOBUFS if page is beyond EOF
383  * - if the page is backed by a block in the cache:
384  *   - a read will be started which will call the callback on completion
385  *   - 0 will be returned
386  * - else if the page is unbacked:
387  *   - the metadata will be retained
388  *   - -ENODATA will be returned
389  */
390 int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
391                                   struct page *page,
392                                   gfp_t gfp)
393 {
394         struct cachefiles_object *object;
395         struct cachefiles_cache *cache;
396         struct inode *inode;
397         sector_t block0, block;
398         unsigned shift;
399         int ret;
400
401         object = container_of(op->op.object,
402                               struct cachefiles_object, fscache);
403         cache = container_of(object->fscache.cache,
404                              struct cachefiles_cache, cache);
405
406         _enter("{%p},{%lx},,,", object, page->index);
407
408         if (!object->backer)
409                 goto enobufs;
410
411         inode = d_backing_inode(object->backer);
412         ASSERT(S_ISREG(inode->i_mode));
413         ASSERT(inode->i_mapping->a_ops->bmap);
414         ASSERT(inode->i_mapping->a_ops->readpages);
415
416         /* calculate the shift required to use bmap */
417         if (inode->i_sb->s_blocksize > PAGE_SIZE)
418                 goto enobufs;
419
420         shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
421
422         op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
423         op->op.flags |= FSCACHE_OP_ASYNC;
424         op->op.processor = cachefiles_read_copier;
425
426         /* we assume the absence or presence of the first block is a good
427          * enough indication for the page as a whole
428          * - TODO: don't use bmap() for this as it is _not_ actually good
429          *   enough for this as it doesn't indicate errors, but it's all we've
430          *   got for the moment
431          */
432         block0 = page->index;
433         block0 <<= shift;
434
435         block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
436         _debug("%llx -> %llx",
437                (unsigned long long) block0,
438                (unsigned long long) block);
439
440         if (block) {
441                 /* submit the apparently valid page to the backing fs to be
442                  * read from disk */
443                 ret = cachefiles_read_backing_file_one(object, op, page);
444         } else if (cachefiles_has_space(cache, 0, 1) == 0) {
445                 /* there's space in the cache we can use */
446                 fscache_mark_page_cached(op, page);
447                 fscache_retrieval_complete(op, 1);
448                 ret = -ENODATA;
449         } else {
450                 goto enobufs;
451         }
452
453         _leave(" = %d", ret);
454         return ret;
455
456 enobufs:
457         fscache_retrieval_complete(op, 1);
458         _leave(" = -ENOBUFS");
459         return -ENOBUFS;
460 }
461
462 /*
463  * read the corresponding pages to the given set from the backing file
464  * - any uncertain pages are simply discarded, to be tried again another time
465  */
466 static int cachefiles_read_backing_file(struct cachefiles_object *object,
467                                         struct fscache_retrieval *op,
468                                         struct list_head *list)
469 {
470         struct cachefiles_one_read *monitor = NULL;
471         struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
472         struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
473         int ret = 0;
474
475         _enter("");
476
477         list_for_each_entry_safe(netpage, _n, list, lru) {
478                 list_del(&netpage->lru);
479
480                 _debug("read back %p{%lu,%d}",
481                        netpage, netpage->index, page_count(netpage));
482
483                 if (!monitor) {
484                         monitor = kzalloc(sizeof(*monitor), cachefiles_gfp);
485                         if (!monitor)
486                                 goto nomem;
487
488                         monitor->op = fscache_get_retrieval(op);
489                         init_waitqueue_func_entry(&monitor->monitor,
490                                                   cachefiles_read_waiter);
491                 }
492
493                 for (;;) {
494                         backpage = find_get_page(bmapping, netpage->index);
495                         if (backpage)
496                                 goto backing_page_already_present;
497
498                         if (!newpage) {
499                                 newpage = __page_cache_alloc(cachefiles_gfp |
500                                                              __GFP_COLD);
501                                 if (!newpage)
502                                         goto nomem;
503                         }
504
505                         ret = add_to_page_cache_lru(newpage, bmapping,
506                                                     netpage->index,
507                                                     cachefiles_gfp);
508                         if (ret == 0)
509                                 goto installed_new_backing_page;
510                         if (ret != -EEXIST)
511                                 goto nomem;
512                 }
513
514                 /* we've installed a new backing page, so now we need
515                  * to start it reading */
516         installed_new_backing_page:
517                 _debug("- new %p", newpage);
518
519                 backpage = newpage;
520                 newpage = NULL;
521
522         reread_backing_page:
523                 ret = bmapping->a_ops->readpage(NULL, backpage);
524                 if (ret < 0)
525                         goto read_error;
526
527                 /* add the netfs page to the pagecache and LRU, and set the
528                  * monitor to transfer the data across */
529         monitor_backing_page:
530                 _debug("- monitor add");
531
532                 ret = add_to_page_cache_lru(netpage, op->mapping,
533                                             netpage->index, cachefiles_gfp);
534                 if (ret < 0) {
535                         if (ret == -EEXIST) {
536                                 page_cache_release(netpage);
537                                 fscache_retrieval_complete(op, 1);
538                                 continue;
539                         }
540                         goto nomem;
541                 }
542
543                 /* install a monitor */
544                 page_cache_get(netpage);
545                 monitor->netfs_page = netpage;
546
547                 page_cache_get(backpage);
548                 monitor->back_page = backpage;
549                 monitor->monitor.private = backpage;
550                 add_page_wait_queue(backpage, &monitor->monitor);
551                 monitor = NULL;
552
553                 /* but the page may have been read before the monitor was
554                  * installed, so the monitor may miss the event - so we have to
555                  * ensure that we do get one in such a case */
556                 if (trylock_page(backpage)) {
557                         _debug("2unlock %p {%lx}", backpage, backpage->flags);
558                         unlock_page(backpage);
559                 }
560
561                 page_cache_release(backpage);
562                 backpage = NULL;
563
564                 page_cache_release(netpage);
565                 netpage = NULL;
566                 continue;
567
568                 /* if the backing page is already present, it can be in one of
569                  * three states: read in progress, read failed or read okay */
570         backing_page_already_present:
571                 _debug("- present %p", backpage);
572
573                 if (PageError(backpage))
574                         goto io_error;
575
576                 if (PageUptodate(backpage))
577                         goto backing_page_already_uptodate;
578
579                 _debug("- not ready %p{%lx}", backpage, backpage->flags);
580
581                 if (!trylock_page(backpage))
582                         goto monitor_backing_page;
583
584                 if (PageError(backpage)) {
585                         _debug("error %lx", backpage->flags);
586                         unlock_page(backpage);
587                         goto io_error;
588                 }
589
590                 if (PageUptodate(backpage))
591                         goto backing_page_already_uptodate_unlock;
592
593                 /* we've locked a page that's neither up to date nor erroneous,
594                  * so we need to attempt to read it again */
595                 goto reread_backing_page;
596
597                 /* the backing page is already up to date, attach the netfs
598                  * page to the pagecache and LRU and copy the data across */
599         backing_page_already_uptodate_unlock:
600                 _debug("uptodate %lx", backpage->flags);
601                 unlock_page(backpage);
602         backing_page_already_uptodate:
603                 _debug("- uptodate");
604
605                 ret = add_to_page_cache_lru(netpage, op->mapping,
606                                             netpage->index, cachefiles_gfp);
607                 if (ret < 0) {
608                         if (ret == -EEXIST) {
609                                 page_cache_release(netpage);
610                                 fscache_retrieval_complete(op, 1);
611                                 continue;
612                         }
613                         goto nomem;
614                 }
615
616                 copy_highpage(netpage, backpage);
617
618                 page_cache_release(backpage);
619                 backpage = NULL;
620
621                 fscache_mark_page_cached(op, netpage);
622
623                 /* the netpage is unlocked and marked up to date here */
624                 fscache_end_io(op, netpage, 0);
625                 page_cache_release(netpage);
626                 netpage = NULL;
627                 fscache_retrieval_complete(op, 1);
628                 continue;
629         }
630
631         netpage = NULL;
632
633         _debug("out");
634
635 out:
636         /* tidy up */
637         if (newpage)
638                 page_cache_release(newpage);
639         if (netpage)
640                 page_cache_release(netpage);
641         if (backpage)
642                 page_cache_release(backpage);
643         if (monitor) {
644                 fscache_put_retrieval(op);
645                 kfree(monitor);
646         }
647
648         list_for_each_entry_safe(netpage, _n, list, lru) {
649                 list_del(&netpage->lru);
650                 page_cache_release(netpage);
651                 fscache_retrieval_complete(op, 1);
652         }
653
654         _leave(" = %d", ret);
655         return ret;
656
657 nomem:
658         _debug("nomem");
659         ret = -ENOMEM;
660         goto record_page_complete;
661
662 read_error:
663         _debug("read error %d", ret);
664         if (ret == -ENOMEM)
665                 goto record_page_complete;
666 io_error:
667         cachefiles_io_error_obj(object, "Page read error on backing file");
668         ret = -ENOBUFS;
669 record_page_complete:
670         fscache_retrieval_complete(op, 1);
671         goto out;
672 }
673
674 /*
675  * read a list of pages from the cache or allocate blocks in which to store
676  * them
677  */
678 int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
679                                    struct list_head *pages,
680                                    unsigned *nr_pages,
681                                    gfp_t gfp)
682 {
683         struct cachefiles_object *object;
684         struct cachefiles_cache *cache;
685         struct list_head backpages;
686         struct pagevec pagevec;
687         struct inode *inode;
688         struct page *page, *_n;
689         unsigned shift, nrbackpages;
690         int ret, ret2, space;
691
692         object = container_of(op->op.object,
693                               struct cachefiles_object, fscache);
694         cache = container_of(object->fscache.cache,
695                              struct cachefiles_cache, cache);
696
697         _enter("{OBJ%x,%d},,%d,,",
698                object->fscache.debug_id, atomic_read(&op->op.usage),
699                *nr_pages);
700
701         if (!object->backer)
702                 goto all_enobufs;
703
704         space = 1;
705         if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
706                 space = 0;
707
708         inode = d_backing_inode(object->backer);
709         ASSERT(S_ISREG(inode->i_mode));
710         ASSERT(inode->i_mapping->a_ops->bmap);
711         ASSERT(inode->i_mapping->a_ops->readpages);
712
713         /* calculate the shift required to use bmap */
714         if (inode->i_sb->s_blocksize > PAGE_SIZE)
715                 goto all_enobufs;
716
717         shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
718
719         pagevec_init(&pagevec, 0);
720
721         op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
722         op->op.flags |= FSCACHE_OP_ASYNC;
723         op->op.processor = cachefiles_read_copier;
724
725         INIT_LIST_HEAD(&backpages);
726         nrbackpages = 0;
727
728         ret = space ? -ENODATA : -ENOBUFS;
729         list_for_each_entry_safe(page, _n, pages, lru) {
730                 sector_t block0, block;
731
732                 /* we assume the absence or presence of the first block is a
733                  * good enough indication for the page as a whole
734                  * - TODO: don't use bmap() for this as it is _not_ actually
735                  *   good enough for this as it doesn't indicate errors, but
736                  *   it's all we've got for the moment
737                  */
738                 block0 = page->index;
739                 block0 <<= shift;
740
741                 block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
742                                                       block0);
743                 _debug("%llx -> %llx",
744                        (unsigned long long) block0,
745                        (unsigned long long) block);
746
747                 if (block) {
748                         /* we have data - add it to the list to give to the
749                          * backing fs */
750                         list_move(&page->lru, &backpages);
751                         (*nr_pages)--;
752                         nrbackpages++;
753                 } else if (space && pagevec_add(&pagevec, page) == 0) {
754                         fscache_mark_pages_cached(op, &pagevec);
755                         fscache_retrieval_complete(op, 1);
756                         ret = -ENODATA;
757                 } else {
758                         fscache_retrieval_complete(op, 1);
759                 }
760         }
761
762         if (pagevec_count(&pagevec) > 0)
763                 fscache_mark_pages_cached(op, &pagevec);
764
765         if (list_empty(pages))
766                 ret = 0;
767
768         /* submit the apparently valid pages to the backing fs to be read from
769          * disk */
770         if (nrbackpages > 0) {
771                 ret2 = cachefiles_read_backing_file(object, op, &backpages);
772                 if (ret2 == -ENOMEM || ret2 == -EINTR)
773                         ret = ret2;
774         }
775
776         _leave(" = %d [nr=%u%s]",
777                ret, *nr_pages, list_empty(pages) ? " empty" : "");
778         return ret;
779
780 all_enobufs:
781         fscache_retrieval_complete(op, *nr_pages);
782         return -ENOBUFS;
783 }
784
785 /*
786  * allocate a block in the cache in which to store a page
787  * - cache withdrawal is prevented by the caller
788  * - returns -EINTR if interrupted
789  * - returns -ENOMEM if ran out of memory
790  * - returns -ENOBUFS if no buffers can be made available
791  * - returns -ENOBUFS if page is beyond EOF
792  * - otherwise:
793  *   - the metadata will be retained
794  *   - 0 will be returned
795  */
796 int cachefiles_allocate_page(struct fscache_retrieval *op,
797                              struct page *page,
798                              gfp_t gfp)
799 {
800         struct cachefiles_object *object;
801         struct cachefiles_cache *cache;
802         int ret;
803
804         object = container_of(op->op.object,
805                               struct cachefiles_object, fscache);
806         cache = container_of(object->fscache.cache,
807                              struct cachefiles_cache, cache);
808
809         _enter("%p,{%lx},", object, page->index);
810
811         ret = cachefiles_has_space(cache, 0, 1);
812         if (ret == 0)
813                 fscache_mark_page_cached(op, page);
814         else
815                 ret = -ENOBUFS;
816
817         fscache_retrieval_complete(op, 1);
818         _leave(" = %d", ret);
819         return ret;
820 }
821
822 /*
823  * allocate blocks in the cache in which to store a set of pages
824  * - cache withdrawal is prevented by the caller
825  * - returns -EINTR if interrupted
826  * - returns -ENOMEM if ran out of memory
827  * - returns -ENOBUFS if some buffers couldn't be made available
828  * - returns -ENOBUFS if some pages are beyond EOF
829  * - otherwise:
830  *   - -ENODATA will be returned
831  * - metadata will be retained for any page marked
832  */
833 int cachefiles_allocate_pages(struct fscache_retrieval *op,
834                               struct list_head *pages,
835                               unsigned *nr_pages,
836                               gfp_t gfp)
837 {
838         struct cachefiles_object *object;
839         struct cachefiles_cache *cache;
840         struct pagevec pagevec;
841         struct page *page;
842         int ret;
843
844         object = container_of(op->op.object,
845                               struct cachefiles_object, fscache);
846         cache = container_of(object->fscache.cache,
847                              struct cachefiles_cache, cache);
848
849         _enter("%p,,,%d,", object, *nr_pages);
850
851         ret = cachefiles_has_space(cache, 0, *nr_pages);
852         if (ret == 0) {
853                 pagevec_init(&pagevec, 0);
854
855                 list_for_each_entry(page, pages, lru) {
856                         if (pagevec_add(&pagevec, page) == 0)
857                                 fscache_mark_pages_cached(op, &pagevec);
858                 }
859
860                 if (pagevec_count(&pagevec) > 0)
861                         fscache_mark_pages_cached(op, &pagevec);
862                 ret = -ENODATA;
863         } else {
864                 ret = -ENOBUFS;
865         }
866
867         fscache_retrieval_complete(op, *nr_pages);
868         _leave(" = %d", ret);
869         return ret;
870 }
871
872 /*
873  * request a page be stored in the cache
874  * - cache withdrawal is prevented by the caller
875  * - this request may be ignored if there's no cache block available, in which
876  *   case -ENOBUFS will be returned
877  * - if the op is in progress, 0 will be returned
878  */
879 int cachefiles_write_page(struct fscache_storage *op, struct page *page)
880 {
881         struct cachefiles_object *object;
882         struct cachefiles_cache *cache;
883         struct file *file;
884         struct path path;
885         loff_t pos, eof;
886         size_t len;
887         void *data;
888         int ret;
889
890         ASSERT(op != NULL);
891         ASSERT(page != NULL);
892
893         object = container_of(op->op.object,
894                               struct cachefiles_object, fscache);
895
896         _enter("%p,%p{%lx},,,", object, page, page->index);
897
898         if (!object->backer) {
899                 _leave(" = -ENOBUFS");
900                 return -ENOBUFS;
901         }
902
903         ASSERT(d_is_reg(object->backer));
904
905         cache = container_of(object->fscache.cache,
906                              struct cachefiles_cache, cache);
907
908         /* write the page to the backing filesystem and let it store it in its
909          * own time */
910         path.mnt = cache->mnt;
911         path.dentry = object->backer;
912         file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred);
913         if (IS_ERR(file)) {
914                 ret = PTR_ERR(file);
915         } else {
916                 pos = (loff_t) page->index << PAGE_SHIFT;
917
918                 /* we mustn't write more data than we have, so we have
919                  * to beware of a partial page at EOF */
920                 eof = object->fscache.store_limit_l;
921                 len = PAGE_SIZE;
922                 if (eof & ~PAGE_MASK) {
923                         ASSERTCMP(pos, <, eof);
924                         if (eof - pos < PAGE_SIZE) {
925                                 _debug("cut short %llx to %llx",
926                                        pos, eof);
927                                 len = eof - pos;
928                                 ASSERTCMP(pos + len, ==, eof);
929                         }
930                 }
931
932                 data = kmap(page);
933                 ret = __kernel_write(file, data, len, &pos);
934                 kunmap(page);
935                 if (ret != len)
936                         ret = -EIO;
937                 fput(file);
938         }
939
940         if (ret < 0) {
941                 if (ret == -EIO)
942                         cachefiles_io_error_obj(
943                                 object, "Write page to backing file failed");
944                 ret = -ENOBUFS;
945         }
946
947         _leave(" = %d", ret);
948         return ret;
949 }
950
951 /*
952  * detach a backing block from a page
953  * - cache withdrawal is prevented by the caller
954  */
955 void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
956 {
957         struct cachefiles_object *object;
958         struct cachefiles_cache *cache;
959
960         object = container_of(_object, struct cachefiles_object, fscache);
961         cache = container_of(object->fscache.cache,
962                              struct cachefiles_cache, cache);
963
964         _enter("%p,{%lu}", object, page->index);
965
966         spin_unlock(&object->fscache.cookie->lock);
967 }