* ->tree_lock (page_remove_rmap->set_page_dirty)
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
* ->inode->i_lock (page_remove_rmap->set_page_dirty)
+ * ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat)
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->__set_page_dirty_buffers)
/*
* Delete a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
- * is safe. The caller must hold the mapping's tree_lock.
+ * is safe. The caller must hold the mapping's tree_lock and
+ * mem_cgroup_begin_page_stat().
*/
-void __delete_from_page_cache(struct page *page, void *shadow)
+void __delete_from_page_cache(struct page *page, void *shadow,
+ struct mem_cgroup *memcg)
{
struct address_space *mapping = page->mapping;
page->mapping = NULL;
/* Leave page->index set: truncation lookup relies upon it */
- __dec_zone_page_state(page, NR_FILE_PAGES);
+ /* hugetlb pages do not participate in page cache accounting. */
+ if (!PageHuge(page))
+ __dec_zone_page_state(page, NR_FILE_PAGES);
if (PageSwapBacked(page))
__dec_zone_page_state(page, NR_SHMEM);
BUG_ON(page_mapped(page));
* anyway will be cleared before returning page into buddy allocator.
*/
if (WARN_ON_ONCE(PageDirty(page)))
- account_page_cleaned(page, mapping);
+ account_page_cleaned(page, mapping, memcg,
+ inode_to_wb(mapping->host));
}
/**
void delete_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
+ struct mem_cgroup *memcg;
+ unsigned long flags;
+
void (*freepage)(struct page *);
BUG_ON(!PageLocked(page));
freepage = mapping->a_ops->freepage;
- spin_lock_irq(&mapping->tree_lock);
- __delete_from_page_cache(page, NULL);
- spin_unlock_irq(&mapping->tree_lock);
+
+ memcg = mem_cgroup_begin_page_stat(page);
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ __delete_from_page_cache(page, NULL, memcg);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ mem_cgroup_end_page_stat(memcg);
if (freepage)
freepage(page);
if (!mapping_cap_writeback_dirty(mapping))
return 0;
+ wbc_attach_fdatawrite_inode(&wbc, mapping->host);
ret = do_writepages(mapping, &wbc);
+ wbc_detach_inode(&wbc);
return ret;
}
}
EXPORT_SYMBOL(filemap_flush);
-/**
- * filemap_fdatawait_range - wait for writeback to complete
- * @mapping: address space structure to wait for
- * @start_byte: offset in bytes where the range starts
- * @end_byte: offset in bytes where the range ends (inclusive)
- *
- * Walk the list of under-writeback pages of the given address space
- * in the given range and wait for all of them.
- */
-int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
- loff_t end_byte)
+static int __filemap_fdatawait_range(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
{
pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
struct pagevec pvec;
int nr_pages;
- int ret2, ret = 0;
+ int ret = 0;
if (end_byte < start_byte)
goto out;
cond_resched();
}
out:
+ return ret;
+}
+
+/**
+ * filemap_fdatawait_range - wait for writeback to complete
+ * @mapping: address space structure to wait for
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Walk the list of under-writeback pages of the given address space
+ * in the given range and wait for all of them. Check error status of
+ * the address space and return it.
+ *
+ * Since the error status of the address space is cleared by this function,
+ * callers are responsible for checking the return value and handling and/or
+ * reporting the error.
+ */
+int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
+ loff_t end_byte)
+{
+ int ret, ret2;
+
+ ret = __filemap_fdatawait_range(mapping, start_byte, end_byte);
ret2 = filemap_check_errors(mapping);
if (!ret)
ret = ret2;
}
EXPORT_SYMBOL(filemap_fdatawait_range);
+/**
+ * filemap_fdatawait_keep_errors - wait for writeback without clearing errors
+ * @mapping: address space structure to wait for
+ *
+ * Walk the list of under-writeback pages of the given address space
+ * and wait for all of them. Unlike filemap_fdatawait(), this function
+ * does not clear error status of the address space.
+ *
+ * Use this function if callers don't handle errors themselves. Expected
+ * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2),
+ * fsfreeze(8)
+ */
+void filemap_fdatawait_keep_errors(struct address_space *mapping)
+{
+ loff_t i_size = i_size_read(mapping->host);
+
+ if (i_size == 0)
+ return;
+
+ __filemap_fdatawait_range(mapping, 0, i_size - 1);
+}
+
/**
* filemap_fdatawait - wait for all under-writeback pages to complete
* @mapping: address space structure to wait for
*
* Walk the list of under-writeback pages of the given address space
- * and wait for all of them.
+ * and wait for all of them. Check error status of the address space
+ * and return it.
+ *
+ * Since the error status of the address space is cleared by this function,
+ * callers are responsible for checking the return value and handling and/or
+ * reporting the error.
*/
int filemap_fdatawait(struct address_space *mapping)
{
if (!error) {
struct address_space *mapping = old->mapping;
void (*freepage)(struct page *);
+ struct mem_cgroup *memcg;
+ unsigned long flags;
pgoff_t offset = old->index;
freepage = mapping->a_ops->freepage;
new->mapping = mapping;
new->index = offset;
- spin_lock_irq(&mapping->tree_lock);
- __delete_from_page_cache(old, NULL);
+ memcg = mem_cgroup_begin_page_stat(old);
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ __delete_from_page_cache(old, NULL, memcg);
error = radix_tree_insert(&mapping->page_tree, offset, new);
BUG_ON(error);
mapping->nrpages++;
- __inc_zone_page_state(new, NR_FILE_PAGES);
+
+ /*
+ * hugetlb pages do not participate in page cache accounting.
+ */
+ if (!PageHuge(new))
+ __inc_zone_page_state(new, NR_FILE_PAGES);
if (PageSwapBacked(new))
__inc_zone_page_state(new, NR_SHMEM);
- spin_unlock_irq(&mapping->tree_lock);
- mem_cgroup_migrate(old, new, true);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ mem_cgroup_end_page_stat(memcg);
+ mem_cgroup_replace_page(old, new);
radix_tree_preload_end();
if (freepage)
freepage(old);
radix_tree_preload_end();
if (unlikely(error))
goto err_insert;
- __inc_zone_page_state(page, NR_FILE_PAGES);
+
+ /* hugetlb pages do not participate in page cache accounting. */
+ if (!huge)
+ __inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
if (!huge)
mem_cgroup_commit_charge(page, memcg, false);
do {
cpuset_mems_cookie = read_mems_allowed_begin();
n = cpuset_mem_spread_node();
- page = alloc_pages_exact_node(n, gfp, 0);
+ page = __alloc_pages_node(n, gfp, 0);
} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
return page;
error = -ENOMEM;
goto out;
}
- error = add_to_page_cache_lru(page, mapping,
- index, GFP_KERNEL);
+ error = add_to_page_cache_lru(page, mapping, index,
+ mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error) {
page_cache_release(page);
if (error == -EEXIST) {
if (!page)
return -ENOMEM;
- ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
+ ret = add_to_page_cache_lru(page, mapping, offset,
+ mapping_gfp_constraint(mapping, GFP_KERNEL));
if (ret == 0)
ret = mapping->a_ops->readpage(file, page);
else if (ret == -EEXIST)
struct file *file,
pgoff_t offset)
{
- unsigned long ra_pages;
struct address_space *mapping = file->f_mapping;
/* If we don't want any read-ahead, don't bother */
/*
* mmap read-around
*/
- ra_pages = max_sane_readahead(ra->ra_pages);
- ra->start = max_t(long, 0, offset - ra_pages / 2);
- ra->size = ra_pages;
- ra->async_size = ra_pages / 4;
+ ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+ ra->size = ra->ra_pages;
+ ra->async_size = ra->ra_pages / 4;
ra_submit(ra, mapping, file);
}
break;
}
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
+
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status < 0))
written += copied;
balance_dirty_pages_ratelimited(mapping);
- if (fatal_signal_pending(current)) {
- status = -EINTR;
- break;
- }
} while (iov_iter_count(i));
return written ? written : status;
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
- err = file_remove_suid(file);
+ err = file_remove_privs(file);
if (err)
goto out;
* page is known to the local caching routines.
*
* The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
+ * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
*
*/
int try_to_release_page(struct page *page, gfp_t gfp_mask)