Rewrite the buffer layer.
---
fs/Makefile | 2
fs/buffer.c | 31
fs/fs-writeback.c | 13
fs/fsblock.c | 2511 ++++++++++++++++++++++++++++++++++++++++++
fs/inode.c | 37
fs/splice.c | 3
include/linux/buffer_head.h | 1
include/linux/fsblock.h | 347 +++++
include/linux/fsblock_types.h | 70 +
include/linux/page-flags.h | 15
init/main.c | 2
mm/filemap.c | 7
mm/page_alloc.c | 3
mm/swap.c | 7
mm/truncate.c | 93 -
mm/vmscan.c | 6
16 files changed, 3077 insertions(+), 71 deletions(-)
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -90,6 +90,8 @@
#define PG_reclaim 17 /* To be reclaimed asap */
#define PG_buddy 19 /* Page is free, on buddy lists */
+#define PG_blocks 20 /* Page has block mappings */
+
/* PG_owner_priv_1 users should have descriptive aliases */
#define PG_checked PG_owner_priv_1 /* Used by some filesystems */
@@ -134,8 +136,17 @@ static inline void SetPageUptodate(struc
if (!test_and_set_bit(PG_uptodate, &page->flags))
page_clear_dirty(page);
}
+static inline void TestSetPageUptodate(struct page *page)
+{
+ if (!test_and_set_bit(PG_uptodate, &page->flags)) {
+ page_clear_dirty(page);
+ return 0;
+ }
+ return 1;
+}
#else
#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags)
+#define TestSetPageUptodate(page) test_and_set_bit(PG_uptodate, &(page)->flags)
#endif
#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)
@@ -217,6 +228,10 @@ static inline void SetPageUptodate(struc
#define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags)
#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags)
+#define PageBlocks(page) test_bit(PG_blocks, &(page)->flags)
+#define SetPageBlocks(page) set_bit(PG_blocks, &(page)->flags)
+#define ClearPageBlocks(page) clear_bit(PG_blocks, &(page)->flags)
+
#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
Index: linux-2.6/fs/Makefile
===================================================================
--- linux-2.6.orig/fs/Makefile
+++ linux-2.6/fs/Makefile
@@ -14,7 +14,7 @@ obj-y := open.o read_write.o file_table.
stack.o
ifeq ($(CONFIG_BLOCK),y)
-obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+obj-y += fsblock.o buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
else
obj-y += no-block.o
endif
Index: linux-2.6/fs/fsblock.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/fsblock.c
@@ -0,0 +1,2511 @@
+/*
+ * fs/fsblock.c
+ *
+ * Copyright (C) 2007 Nick Piggin, SuSE Labs, Novell Inc.
+ */
+
+#include <linux/fsblock.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/bitops.h>
+#include <linux/pagevec.h>
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/rcupdate.h> /* XXX: get rid of RCU */
+#include <linux/module.h>
+#include <linux/bit_spinlock.h> /* bit_spin_lock for subpage blocks */
+#include <linux/vmalloc.h> /* vmap for superpage blocks */
+#include <linux/gfp.h>
+//#include <linux/buffer_head.h> /* too much crap in me */
+extern int try_to_free_buffers(struct page *);
+
+/* XXX: add a page / block invariant checker function? */
+
+#include <asm/atomic.h>
+
+#define SECTOR_SHIFT MIN_SECTOR_SHIFT
+#define NR_SUB_SIZES (1 << (PAGE_CACHE_SHIFT - MIN_SECTOR_SHIFT))
+
+static struct kmem_cache *block_cache __read_mostly;
+static struct kmem_cache *mblock_cache __read_mostly;
+
+static void block_ctor(void *data, struct kmem_cache *cachep,
+ unsigned long flags)
+{
+ struct fsblock *block = data;
+ atomic_set(&block->count, 0);
+}
+
+void __init fsblock_init(void)
+{
+ block_cache = kmem_cache_create("fsblock-data",
+ sizeof(struct fsblock), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_DESTROY_BY_RCU,
+ block_ctor, NULL);
+
+ mblock_cache = kmem_cache_create("fsblock-metadata",
+ sizeof(struct fsblock_meta), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_DESTROY_BY_RCU,
+ block_ctor, NULL);
+}
+
+static void init_block(struct page *page, struct fsblock *block, unsigned int bits)
+{
+ block->flags = 0;
+ block->block_nr = -1;
+ block->page = page;
+ block->private = NULL;
+ FSB_BUG_ON(atomic_read(&block->count));
+ atomic_inc(&block->count);
+ __set_bit(BL_locked, &block->flags);
+ fsblock_set_bits(block, bits);
+#ifdef FSB_DEBUG
+ atomic_set(&block->vmap_count, 0);
+#endif
+}
+
+static void init_mblock(struct page *page, struct fsblock_meta *mblock, unsigned int bits)
+{
+ init_block(page, &mblock->block, bits);
+ __set_bit(BL_metadata, &mblock->block.flags);
+ INIT_LIST_HEAD(&mblock->assoc_list);
+ mblock->assoc_mapping = NULL;
+}
+
+static struct fsblock *alloc_blocks(struct page *page, unsigned int bits, gfp_t gfp_flags)
+{
+ struct fsblock *block;
+ int nid = page_to_nid(page);
+
+ if (bits >= PAGE_CACHE_SHIFT) { /* !subpage */
+ block = kmem_cache_alloc_node(block_cache, gfp_flags, nid);
+ if (likely(block))
+ init_block(page, block, bits);
+ } else {
+ int nr = PAGE_CACHE_SIZE >> bits;
+ /* XXX: could have a range of cache sizes */
+ block = kmalloc_node(sizeof(struct fsblock)*nr, gfp_flags, nid);
+ if (likely(block)) {
+ int i;
+ for (i = 0; i < nr; i++) {
+ struct fsblock *b = block + i;
+ atomic_set(&b->count, 0);
+ init_block(page, b, bits);
+ }
+ }
+ }
+ return block;
+}
+
+static struct fsblock_meta *alloc_mblocks(struct page *page, unsigned int bits, gfp_t gfp_flags)
+{
+ struct fsblock_meta *mblock;
+ int nid = page_to_nid(page);
+
+ if (bits >= PAGE_CACHE_SHIFT) { /* !subpage */
+ mblock = kmem_cache_alloc_node(mblock_cache, gfp_flags, nid);
+ if (likely(mblock))
+ init_mblock(page, mblock, bits);
+ } else {
+ int nr = PAGE_CACHE_SIZE >> bits;
+ mblock = kmalloc_node(sizeof(struct fsblock_meta)*nr, gfp_flags, nid);
+ if (likely(mblock)) {
+ int i;
+ for (i = 0; i < nr; i++) {
+ struct fsblock_meta *mb = mblock + i;
+ atomic_set(&mb->block.count, 0);
+ init_mblock(page, mb, bits);
+ }
+ }
+ }
+ return mblock;
+}
+
+#ifdef FSB_DEBUG
+static void assert_block(struct fsblock *block)
+{
+ struct page *page = block->page;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(!PageBlocks(page));
+
+ if (fsblock_superpage(block)) {
+ struct page *p;
+
+ FSB_BUG_ON(page->index != first_page_idx(page->index,
+ fsblock_size(block)));
+
+ for_each_page(page, fsblock_size(block), p) {
+ FSB_BUG_ON(!PageBlocks(p));
+ FSB_BUG_ON(page_blocks(p) != block);
+ } end_for_each_page;
+ } else if (fsblock_subpage(block)) {
+ struct fsblock *b;
+ block = page_blocks(block->page);
+
+ for_each_block(block, b)
+ FSB_BUG_ON(b->page != page);
+ }
+}
+
+static void free_block_check(struct fsblock *block)
+{
+ unsigned long flags = block->flags;
+ unsigned long badflags =
+ (1 << BL_locked |
+ 1 << BL_dirty |
+ /* 1 << BL_error | */
+ 1 << BL_new |
+ 1 << BL_writeback |
+ 1 << BL_readin |
+ 1 << BL_sync_io);
+ unsigned long goodflags = 0;
+ unsigned int size = fsblock_size(block);
+ unsigned int count = atomic_read(&block->count);
+ unsigned int vmap_count = atomic_read(&block->vmap_count);
+ void *private = block->private;
+
+ if ((flags & badflags) || ((flags & goodflags) != goodflags) || count || private || vmap_count) {
+ printk("block flags = %lx\n", flags);
+ printk("block size = %u\n", size);
+ printk("block count = %u\n", count);
+ printk("block private = %p\n", private);
+ printk("vmap count = %u\n", vmap_count);
+ BUG();
+ }
+}
+#else
+static inline void assert_block(struct fsblock *block) {}
+#endif
+
+static void rcu_free_block(struct rcu_head *head)
+{
+ struct fsblock *block = container_of(head, struct fsblock, rcu_head);
+ kfree(block);
+}
+
+static void free_block(struct fsblock *block)
+{
+ if (fsblock_subpage(block)) {
+#ifdef FSB_DEBUG
+ unsigned int bits = fsblock_bits(block);
+ int i, nr = PAGE_CACHE_SIZE >> bits;
+
+ for (i = 0; i < nr; i++) {
+ struct fsblock *b;
+ if (test_bit(BL_metadata, &block->flags))
+ b = &(block_mblock(block) + i)->block;
+ else
+ b = block + i;
+ free_block_check(b);
+ }
+#endif
+
+ INIT_RCU_HEAD(&block->rcu_head);
+ call_rcu(&block->rcu_head, rcu_free_block);
+ } else {
+#ifdef VMAP_CACHE
+ if (test_bit(BL_vmapped, &block->flags)) {
+ vunmap(block->vaddr);
+ block->vaddr = NULL;
+ clear_bit(BL_vmapped, &block->flags);
+ }
+#endif
+#ifdef FSB_DEBUG
+ free_block_check(block);
+#endif
+ if (test_bit(BL_metadata, &block->flags))
+ kmem_cache_free(mblock_cache, block);
+ else
+ kmem_cache_free(block_cache, block);
+ }
+}
+
+int block_get_unless_zero(struct fsblock *block)
+{
+ return atomic_inc_not_zero(&block->count);
+}
+
+void block_get(struct fsblock *block)
+{
+ FSB_BUG_ON(atomic_read(&block->count) == 0);
+ atomic_inc(&block->count);
+}
+EXPORT_SYMBOL(block_get);
+
+static int fsblock_noblock = 1 __read_mostly; /* Like nobh mode */
+
+void block_put(struct fsblock *block)
+{
+ int free_it;
+ struct page *page;
+
+ page = block->page;
+ free_it = 0;
+ if (!page->mapping || fsblock_noblock) {
+ free_it = 1;
+ page_cache_get(page);
+ }
+
+#ifdef FSB_DEBUG
+ FSB_BUG_ON(atomic_read(&block->count) == 2 &&
+ atomic_read(&block->vmap_count));
+#endif
+ FSB_BUG_ON(atomic_read(&block->count) <= 1);
+
+ /* dec_return required for the release memory barrier */
+ if (atomic_dec_return(&block->count) == 1) {
+ if (free_it && !test_bit(BL_dirty, &block->flags)) {
+ /*
+ * At this point we'd like to try stripping the block
+ * if it is only existing in a self-referential
+ * relationship with the pagecache (ie. the pagecache
+ * is truncated as well).
+ */
+ if (!TestSetPageLocked(page)) {
+ try_to_free_blocks(page);
+ unlock_page(page);
+ }
+ }
+ }
+ if (free_it)
+ page_cache_release(page);
+}
+EXPORT_SYMBOL(block_put);
+
+static int sleep_on_block(void *unused)
+{
+ io_schedule();
+ return 0;
+}
+
+void lock_block(struct fsblock *block)
+{
+ might_sleep();
+
+ if (!trylock_block(block))
+ wait_on_bit_lock(&block->flags, BL_locked, sleep_on_block,
+ TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(lock_block);
+
+void unlock_block(struct fsblock *block)
+{
+ FSB_BUG_ON(!test_bit(BL_locked, &block->flags));
+ smp_mb__before_clear_bit();
+ clear_bit(BL_locked, &block->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&block->flags, BL_locked);
+}
+EXPORT_SYMBOL(unlock_block);
+
+void wait_on_block_locked(struct fsblock *block)
+{
+ might_sleep();
+
+ if (test_bit(BL_locked, &block->flags))
+ wait_on_bit(&block->flags, BL_locked, sleep_on_block,
+ TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_on_block_locked);
+
+static void set_block_sync_io(struct fsblock *block)
+{
+ FSB_BUG_ON(!PageLocked(block->page));
+ FSB_BUG_ON(test_bit(BL_sync_io, &block->flags));
+#ifdef FSB_DEBUG
+ if (fsblock_superpage(block)) {
+ struct page *page = block->page, *p;
+ for_each_page(page, fsblock_size(block), p) {
+ FSB_BUG_ON(!PageLocked(p));
+ FSB_BUG_ON(PageWriteback(p));
+ } end_for_each_page;
+ } else {
+ FSB_BUG_ON(!PageLocked(block->page));
+ FSB_BUG_ON(PageWriteback(block->page));
+ }
+#endif
+ set_bit(BL_sync_io, &block->flags);
+}
+
+static void end_block_sync_io(struct fsblock *block)
+{
+ FSB_BUG_ON(!PageLocked(block->page));
+ FSB_BUG_ON(!test_bit(BL_sync_io, &block->flags));
+ clear_bit(BL_sync_io, &block->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&block->flags, BL_sync_io);
+}
+
+static void wait_on_block_sync_io(struct fsblock *block)
+{
+ might_sleep();
+
+ FSB_BUG_ON(!PageLocked(block->page));
+ if (test_bit(BL_sync_io, &block->flags))
+ wait_on_bit(&block->flags, BL_sync_io, sleep_on_block,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static void iolock_block(struct fsblock *block)
+{
+ struct page *page, *p;
+ might_sleep();
+
+ page = block->page;
+ if (!fsblock_superpage(block))
+ lock_page(page);
+ else {
+ for_each_page(page, fsblock_size(block), p) {
+ lock_page(p);
+ } end_for_each_page;
+ }
+}
+
+static void iounlock_block(struct fsblock *block)
+{
+ struct page *page, *p;
+
+ page = block->page;
+ if (!fsblock_superpage(block))
+ unlock_page(page);
+ else {
+ for_each_page(page, fsblock_size(block), p) {
+ unlock_page(p);
+ } end_for_each_page;
+ }
+}
+
+static void wait_on_block_iolock(struct fsblock *block)
+{
+ struct page *page, *p;
+ might_sleep();
+
+ page = block->page;
+ if (!fsblock_superpage(block))
+ wait_on_page_locked(page);
+ else {
+ for_each_page(page, fsblock_size(block), p) {
+ wait_on_page_locked(p);
+ } end_for_each_page;
+ }
+}
+
+static void set_block_writeback(struct fsblock *block)
+{
+ struct page *page, *p;
+ might_sleep();
+
+ page = block->page;
+ if (!fsblock_superpage(block)) {
+ set_page_writeback(page);
+ unlock_page(page);
+ } else {
+ for_each_page(page, fsblock_size(block), p) {
+ set_page_writeback(p);
+ unlock_page(p);
+ } end_for_each_page;
+ }
+}
+
+static void end_block_writeback(struct fsblock *block)
+{
+ struct page *page, *p;
+
+ page = block->page;
+ if (!fsblock_superpage(block))
+ end_page_writeback(page);
+ else {
+ for_each_page(page, fsblock_size(block), p) {
+ end_page_writeback(p);
+ } end_for_each_page;
+ }
+}
+
+static void wait_on_block_writeback(struct fsblock *block)
+{
+ struct page *page, *p;
+ might_sleep();
+
+ page = block->page;
+ if (!fsblock_superpage(block))
+ wait_on_page_writeback(page);
+ else {
+ for_each_page(page, fsblock_size(block), p) {
+ wait_on_page_writeback(p);
+ } end_for_each_page;
+ }
+}
+
+static struct block_device *mapping_data_bdev(struct address_space *mapping)
+{
+ struct inode *inode = mapping->host;
+ if (unlikely(S_ISBLK(inode->i_mode)))
+ return inode->i_bdev;
+ else
+ return inode->i_sb->s_bdev;
+}
+
+static struct fsblock *find_get_page_block(struct page *page)
+{
+ struct fsblock *block;
+
+ rcu_read_lock();
+again:
+ block = page_blocks_rcu(page);
+ if (block) {
+ /*
+ * Might be better off implementing this as a bit spinlock
+ * rather than count (which requires tricks with ordering
+ * eg. release vs set page dirty).
+ */
+ if (block_get_unless_zero(block)) {
+ if ((page_blocks_rcu(page) != block)) {
+ block_put(block);
+ block = NULL;
+ }
+ } else {
+ cpu_relax();
+ goto again;
+ }
+ }
+ rcu_read_unlock();
+
+ return block;
+}
+
+static int __set_page_dirty_noblocks(struct page *page)
+{
+ FSB_BUG_ON(!PageBlocks(page));
+ FSB_BUG_ON(!fsblock_subpage(page_blocks(page)) && !PageUptodate(page));
+
+ return __set_page_dirty_nobuffers(page);
+}
+
+int fsblock_set_page_dirty(struct page *page)
+{
+ struct fsblock *block;
+ int ret = 0;
+
+ FSB_BUG_ON(!PageUptodate(page));
+ FSB_BUG_ON(!PageBlocks(page));
+// FSB_BUG_ON(!PageLocked(page)); /* XXX: this can go away when we pin a page's metadata */
+
+ block = page_blocks(page);
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+
+ for_each_block(block, b) {
+ FSB_BUG_ON(!test_bit(BL_uptodate, &b->flags));
+ if (!test_bit(BL_dirty, &b->flags)) {
+ set_bit(BL_dirty, &b->flags);
+ ret = 1;
+ }
+ }
+ } else {
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ if (!test_bit(BL_dirty, &block->flags)) {
+ set_bit(BL_dirty, &block->flags);
+ ret = 1;
+ }
+ }
+ /*
+ * XXX: this is slightly racy because the above blocks could be
+ * cleaned in a writeback that's underway, while the page will
+ * still get marked dirty below. This technically breaks some
+ * invariants that we check for (that a dirty page must have at
+ * least 1 dirty buffer). Eventually we could just relax those
+ * invariants, but keep them in for now to catch bugs.
+ */
+ return __set_page_dirty_noblocks(page);
+}
+EXPORT_SYMBOL(fsblock_set_page_dirty);
+
+/*
+ * Do we need a fast atomic version for just page sized / aligned maps?
+ */
+void *vmap_block(struct fsblock *block, off_t off, size_t len)
+{
+ struct address_space *mapping = block->page->mapping;
+ unsigned int size = fsblock_size(block);
+
+ FSB_BUG_ON(off < 0);
+ FSB_BUG_ON(off + len > size);
+
+ if (!fsblock_superpage(block)) {
+ unsigned int page_offset = 0;
+ if (fsblock_subpage(block))
+ page_offset = block_page_offset(block, size);
+#ifdef FSB_DEBUG
+ atomic_inc(&block->vmap_count);
+#endif
+ return kmap(block->page) + page_offset + off;
+ } else {
+ pgoff_t pgoff, start, end;
+ unsigned long pos;
+
+#ifdef VMAP_CACHE
+ if (test_bit(BL_vmapped, &block->flags)) {
+ while (test_bit(BL_vmap_lock, &block->flags))
+ cpu_relax();
+ smp_rmb();
+#ifdef FSB_DEBUG
+ atomic_inc(&block->vmap_count);
+#endif
+ return block->vaddr + off;
+ }
+#endif
+
+ pgoff = block->page->index;
+ FSB_BUG_ON(test_bit(BL_metadata, &block->flags) &&
+ pgoff != block->block_nr * (size >> PAGE_CACHE_SHIFT));
+ start = pgoff + (off >> PAGE_CACHE_SHIFT);
+ end = pgoff + ((off + len - 1) >> PAGE_CACHE_SHIFT);
+ pos = off & ~PAGE_CACHE_MASK;
+
+#ifndef VMAP_CACHE
+ if (start == end) {
+ struct page *page;
+
+ page = find_page(mapping, start);
+ FSB_BUG_ON(!page);
+
+#ifdef FSB_DEBUG
+ atomic_inc(&block->vmap_count);
+#endif
+ return kmap(page) + pos;
+ } else
+#endif
+ {
+ int nr;
+ struct page **pages;
+ void *addr;
+#ifndef VMAP_CACHE
+ nr = end - start + 1;
+#else
+ nr = size >> PAGE_CACHE_SHIFT;
+#endif
+ pages = kmalloc(nr * sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+#ifndef VMAP_CACHE
+ find_pages(mapping, start, nr, pages);
+#else
+ find_pages(mapping, pgoff, nr, pages);
+#endif
+
+ addr = vmap(pages, nr, VM_MAP, PAGE_KERNEL);
+ kfree(pages);
+ if (!addr)
+ return ERR_PTR(-ENOMEM);
+
+#ifdef FSB_DEBUG
+ atomic_inc(&block->vmap_count);
+#endif
+#ifndef VMAP_CACHE
+ return addr + pos;
+#else
+ bit_spin_lock(BL_vmap_lock, &block->flags);
+ if (!test_bit(BL_vmapped, &block->flags)) {
+ block->vaddr = addr;
+ set_bit(BL_vmapped, &block->flags);
+ }
+ bit_spin_unlock(BL_vmap_lock, &block->flags);
+ if (block->vaddr != addr)
+ vunmap(addr);
+ return block->vaddr + off;
+#endif
+ }
+ }
+}
+EXPORT_SYMBOL(vmap_block);
+
+void vunmap_block(struct fsblock *block, off_t off, size_t len, void *vaddr)
+{
+#ifdef FSB_DEBUG
+ FSB_BUG_ON(atomic_read(&block->vmap_count) <= 0);
+ atomic_dec(&block->vmap_count);
+#endif
+ if (!fsblock_superpage(block))
+ kunmap(block->page);
+#ifndef VMAP_CACHE
+ else {
+ unsigned int size = fsblock_size(block);
+ pgoff_t pgoff, start, end;
+
+ pgoff = block->block_nr * (size >> PAGE_CACHE_SHIFT);
+ FSB_BUG_ON(pgoff != block->page->index);
+ start = pgoff + (off >> PAGE_CACHE_SHIFT);
+ end = pgoff + ((off + len - 1) >> PAGE_CACHE_SHIFT);
+
+ if (start == end) {
+ struct address_space *mapping = block->page->mapping;
+ struct page *page;
+
+ page = find_page(mapping, start);
+ FSB_BUG_ON(!page);
+
+ kunmap(page);
+ } else {
+ unsigned long pos;
+
+ pos = off & ~PAGE_CACHE_MASK;
+ vunmap(vaddr - pos);
+ }
+ }
+#endif
+}
+EXPORT_SYMBOL(vunmap_block);
+
+static struct fsblock *__find_get_block(struct address_space *mapping, sector_t blocknr)
+{
+ struct inode *inode = mapping->host;
+ struct page *page;
+ pgoff_t pgoff;
+
+ pgoff = sector_pgoff(blocknr, inode->i_blkbits);
+
+ page = find_get_page(mapping, pgoff);
+ if (page) {
+ struct fsblock *block;
+ block = find_get_page_block(page);
+ if (block) {
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+ for_each_block(block, b) {
+ if (b->block_nr == blocknr) {
+ block_get(b);
+ block_put(block);
+ block = b;
+ goto found;
+ }
+ }
+ FSB_BUG();
+ } else
+ FSB_BUG_ON(block->block_nr != blocknr);
+found:
+ FSB_BUG_ON(!test_bit(BL_mapped, &block->flags));
+ }
+
+ page_cache_release(page);
+ return block;
+ }
+ return NULL;
+}
+
+struct fsblock_meta *find_get_mblock(struct block_device *bdev, sector_t blocknr, unsigned int size)
+{
+ struct fsblock *block;
+
+ block = __find_get_block(bdev->bd_inode->i_mapping, blocknr);
+ if (block) {
+ if (test_bit(BL_metadata, &block->flags)) {
+ /*
+ * XXX: need a better way than 'size' to tag and
+ * identify metadata fsblocks?
+ */
+ if (fsblock_size(block) == size)
+ return block_mblock(block);
+ }
+
+ block_put(block);
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(find_get_mblock);
+
+static void attach_block_page(struct page *page, struct fsblock *block)
+{
+ if (PageUptodate(page))
+ set_bit(BL_uptodate, &block->flags);
+ unlock_block(block); /* XXX: need this? */
+}
+
+/* This goes away when we get rid of buffer.c */
+static int invalidate_aliasing_buffers(struct page *page, unsigned int size)
+{
+ if (!size_is_superpage(size)) {
+ if (PagePrivate(page))
+ return try_to_free_buffers(page);
+ } else {
+ struct page *p;
+
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(!PageLocked(p));
+ FSB_BUG_ON(PageBlocks(p));
+
+ if (PagePrivate(p)) {
+ if (!try_to_free_buffers(p))
+ return 0;
+ }
+ } end_for_each_page;
+ }
+ return 1;
+}
+
+static int __try_to_free_blocks(struct page *page, int all_locked);
+static int invalidate_aliasing_blocks(struct page *page, unsigned int size)
+{
+ if (!size_is_superpage(size)) {
+ if (PageBlocks(page)) {
+ /* could check for compatible blocks here, but meh */
+ return __try_to_free_blocks(page, 1);
+ }
+ } else {
+ struct page *p;
+
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(!PageLocked(p));
+ FSB_BUG_ON(PageBlocks(p));
+
+ if (PageBlocks(p)) {
+ if (!__try_to_free_blocks(p, 1))
+ return 0;
+ }
+ } end_for_each_page;
+ }
+ return 1;
+}
+
+#define CREATE_METADATA 0x01
+#define CREATE_DIRTY 0x02
+static int create_unmapped_blocks(struct page *page, gfp_t gfp_flags, unsigned int size, unsigned int flags)
+{
+ unsigned int bits = ffs(size) - 1;
+ struct fsblock *block;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(PageDirty(page));
+ FSB_BUG_ON(PageWriteback(page));
+ FSB_BUG_ON(PageBlocks(page));
+ FSB_BUG_ON(flags & CREATE_DIRTY);
+
+ if (!invalidate_aliasing_buffers(page, size))
+ return -EBUSY;
+
+ /*
+ * XXX: maybe use private alloc funcions so fses can embed block into
+ * their fs-private block rather than using ->private? Maybe ->private
+ * is easier though...
+ */
+ if (!(flags & CREATE_METADATA)) {
+ block = alloc_blocks(page, bits, gfp_flags);
+ if (!block)
+ return -ENOMEM;
+ } else {
+ struct fsblock_meta *mblock;
+ mblock = alloc_mblocks(page, bits, gfp_flags);
+ if (!mblock)
+ return -ENOMEM;
+ block = mblock_block(mblock);
+ }
+
+ if (!fsblock_superpage(block)) {
+ attach_page_blocks(page, block);
+ /*
+ * Ensure ordering between setting page->block ptr and reading
+ * PageDirty, thus giving synchronisation between this and
+ * fsblock_set_page_dirty()
+ */
+ smp_mb();
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+ for_each_block(block, b)
+ attach_block_page(page, b);
+ } else
+ attach_block_page(page, block);
+ } else {
+ struct page *p;
+ int uptodate = 1;
+ FSB_BUG_ON(page->index != first_page_idx(page->index, size));
+
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(!PageLocked(p));
+ FSB_BUG_ON(PageDirty(p));
+ FSB_BUG_ON(PageWriteback(p));
+ FSB_BUG_ON(PageBlocks(p));
+ attach_page_blocks(p, block);
+ } end_for_each_page;
+ smp_mb();
+ for_each_page(page, size, p) {
+ if (!PageUptodate(p))
+ uptodate = 0;
+ } end_for_each_page;
+ if (uptodate)
+ set_bit(BL_uptodate, &block->flags);
+ unlock_block(block);
+ }
+
+ assert_block(block);
+
+ return 0;
+}
+
+static struct page *create_lock_page_range(struct address_space *mapping,
+ pgoff_t pgoff, unsigned int size)
+{
+ struct page *page;
+ gfp_t gfp;
+
+ gfp = mapping_gfp_mask(mapping) & ~__GFP_FS;
+ page = find_or_create_page(mapping, pgoff, gfp);
+ if (!page)
+ return NULL;
+
+ FSB_BUG_ON(!page->mapping);
+ page_cache_release(page);
+
+ if (size_is_superpage(size)) {
+ int i, nr = size >> PAGE_CACHE_SHIFT;
+
+ FSB_BUG_ON(pgoff != first_page_idx(pgoff, size));
+
+ for (i = 1; i < nr; i++) {
+ struct page *p;
+
+ p = find_or_create_page(mapping, pgoff + i, gfp);
+ if (!p) {
+ nr = i;
+ for (i = 0; i < nr; i++) {
+ p = find_page(mapping, pgoff + i);
+ FSB_BUG_ON(!p);
+ unlock_page(p);
+ }
+ return NULL;
+ }
+ FSB_BUG_ON(!p->mapping);
+ page_cache_release(p);
+ /*
+ * don't want a ref hanging around (see end io handlers
+ * for pagecache). Page lock pins the pcache ref.
+ * XXX: this is a little unclean.
+ */
+ }
+ }
+ FSB_BUG_ON(page->index != pgoff);
+ return page;
+}
+
+static void unlock_page_range(struct page *page, unsigned int size)
+{
+ if (!size_is_superpage(size))
+ unlock_page(page);
+ else {
+ struct page *p;
+
+ FSB_BUG_ON(page->index != first_page_idx(page->index, size));
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(!p);
+ unlock_page(p);
+ } end_for_each_page;
+ }
+}
+
+struct fsblock_meta *find_or_create_mblock(struct block_device *bdev, sector_t blocknr, unsigned int size)
+{
+ struct inode *bd_inode = bdev->bd_inode;
+ struct address_space *bd_mapping = bd_inode->i_mapping;
+ struct page *page;
+ struct fsblock_meta *mblock;
+ pgoff_t pgoff;
+ int ret;
+
+ pgoff = sector_pgoff(blocknr, bd_inode->i_blkbits);
+
+ mblock = find_get_mblock(bdev, blocknr, size);
+ if (mblock)
+ return mblock;
+
+ page = create_lock_page_range(bd_mapping, pgoff, size);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ if (!invalidate_aliasing_blocks(page, size)) {
+ mblock = ERR_PTR(-EBUSY);
+ goto failed;
+ }
+ ret = create_unmapped_blocks(page, GFP_NOFS, size, CREATE_METADATA);
+ if (ret) {
+ mblock = ERR_PTR(ret);
+ goto failed;
+ }
+
+ mblock = page_mblocks(page);
+ /*
+ * XXX: technically this is just the block_dev.c direct
+ * mapping. So maybe logically in that file? (OTOH it *is*
+ * "metadata")
+ */
+ if (fsblock_subpage(&mblock->block)) {
+ struct fsblock_meta *ret = NULL, *mb;
+ sector_t base_block;
+ base_block = pgoff << (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+ __for_each_mblock(mblock, size, mb) {
+ mb->block.block_nr = base_block;
+ set_bit(BL_mapped, &mb->block.flags);
+ if (mb->block.block_nr == blocknr) {
+ FSB_BUG_ON(ret);
+ ret = mb;
+ }
+ base_block++;
+ }
+ FSB_BUG_ON(!ret);
+ mblock = ret;
+ } else {
+ mblock->block.block_nr = blocknr;
+ set_bit(BL_mapped, &mblock->block.flags);
+ }
+ mblock_get(mblock);
+failed:
+ unlock_page_range(page, size);
+ return mblock;
+}
+EXPORT_SYMBOL(find_or_create_mblock);
+
+static void block_end_read(struct fsblock *block, int uptodate)
+{
+ int sync_io;
+ int finished_readin = 1;
+ struct page *page = block->page;
+
+ FSB_BUG_ON(test_bit(BL_uptodate, &block->flags));
+ FSB_BUG_ON(test_bit(BL_error, &block->flags));
+
+ sync_io = test_bit(BL_sync_io, &block->flags);
+
+ if (unlikely(!uptodate)) {
+ set_bit(BL_error, &block->flags);
+ if (!fsblock_superpage(block))
+ SetPageError(page);
+ else {
+ struct page *p;
+ for_each_page(page, fsblock_size(block), p) {
+ SetPageError(p);
+ } end_for_each_page;
+ }
+ } else
+ set_bit(BL_uptodate, &block->flags);
+
+ if (fsblock_subpage(block)) {
+ unsigned long flags;
+ struct fsblock *b, *first = page_blocks(block->page);
+
+ local_irq_save(flags);
+ bit_spin_lock(BL_rd_lock, &first->flags);
+ clear_bit(BL_readin, &block->flags);
+ for_each_block(page_blocks(page), b) {
+ if (test_bit(BL_readin, &b->flags)) {
+ finished_readin = 0;
+ break;
+ }
+ if (!test_bit(BL_uptodate, &b->flags))
+ uptodate = 0;
+ }
+ bit_spin_unlock(BL_rd_lock, &first->flags);
+ local_irq_restore(flags);
+ } else
+ clear_bit(BL_readin, &block->flags);
+
+ if (sync_io)
+ finished_readin = 0; /* don't unlock */
+ if (!fsblock_superpage(block)) {
+ FSB_BUG_ON(PageWriteback(page));
+ if (uptodate)
+ SetPageUptodate(page);
+ if (finished_readin)
+ unlock_page(page);
+ /*
+ * XXX: don't know whether or not to keep the page
+ * refcount elevated or simply rely on the page lock...
+ */
+ } else {
+ struct page *p;
+
+ for_each_page(page, fsblock_size(block), p) {
+ FSB_BUG_ON(PageDirty(p));
+ FSB_BUG_ON(PageWriteback(p));
+ if (uptodate)
+ SetPageUptodate(p);
+ if (finished_readin)
+ unlock_page(p);
+ } end_for_each_page;
+ }
+
+ if (sync_io)
+ end_block_sync_io(block);
+
+ block_put(block);
+}
+
+static void block_end_write(struct fsblock *block, int uptodate)
+{
+ int sync_io;
+ int finished_writeback = 1;
+ struct page *page = block->page;
+
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ FSB_BUG_ON(test_bit(BL_error, &block->flags));
+
+ sync_io = test_bit(BL_sync_io, &block->flags);
+
+ if (unlikely(!uptodate)) {
+ set_bit(BL_error, &block->flags);
+ if (!fsblock_superpage(block))
+ SetPageError(page);
+ else {
+ struct page *p;
+ for_each_page(page, fsblock_size(block), p) {
+ SetPageError(p);
+ } end_for_each_page;
+ }
+ set_bit(AS_EIO, &page->mapping->flags);
+ }
+
+ if (fsblock_subpage(block)) {
+ unsigned long flags;
+ struct fsblock *b, *first = page_blocks(block->page);
+
+ local_irq_save(flags);
+ bit_spin_lock(BL_wb_lock, &first->flags);
+ clear_bit(BL_writeback, &block->flags);
+ for_each_block(first, b) {
+ if (test_bit(BL_writeback, &b->flags)) {
+ finished_writeback = 0;
+ break;
+ }
+ }
+ bit_spin_unlock(BL_wb_lock, &first->flags);
+ local_irq_restore(flags);
+ } else
+ clear_bit(BL_writeback, &block->flags);
+
+ if (!sync_io) {
+ if (finished_writeback) {
+ if (!fsblock_superpage(block)) {
+ end_page_writeback(page);
+ } else {
+ struct page *p;
+ for_each_page(page, fsblock_size(block), p) {
+ FSB_BUG_ON(!p->mapping);
+ end_page_writeback(p);
+ } end_for_each_page;
+ }
+ }
+ } else
+ end_block_sync_io(block);
+
+ block_put(block);
+}
+
+int fsblock_strip = 1;
+
+static int block_end_bio_io(struct bio *bio, unsigned int bytes_done, int err)
+{
+ struct fsblock *block = bio->bi_private;
+ int uptodate;
+
+ if (bio->bi_size)
+ return 1;
+
+ uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+
+ if (err == -EOPNOTSUPP) {
+ printk(KERN_WARNING "block_end_bio_io: op not supported!\n");
+ WARN_ON(uptodate);
+ }
+
+ FSB_BUG_ON(!(test_bit(BL_readin, &block->flags) ^
+ test_bit(BL_writeback, &block->flags)));
+
+ if (test_bit(BL_readin, &block->flags))
+ block_end_read(block, uptodate);
+ else
+ block_end_write(block, uptodate);
+
+ bio_put(bio);
+
+ return 0;
+}
+
+static int submit_block(struct fsblock *block, int rw)
+{
+ struct page *page = block->page;
+ struct address_space *mapping = page->mapping;
+ struct bio *bio;
+ int ret = 0;
+ unsigned int bits = fsblock_bits(block);
+ unsigned int size = 1 << bits;
+ int nr = (size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
+
+#if 0
+ printk("submit_block for %s [blocknr=%lu, sector=%lu, size=%u]\n",
+ (test_bit(BL_readin, &block->flags) ? "read" : "write"),
+ (unsigned long)block->block_nr,
+ (unsigned long)block->block_nr * (size >> SECTOR_SHIFT), size);
+#endif
+
+ FSB_BUG_ON(!PageLocked(page) && !PageWriteback(page));
+ FSB_BUG_ON(!mapping);
+ FSB_BUG_ON(!test_bit(BL_mapped, &block->flags));
+
+ clear_bit(BL_error, &block->flags);
+
+ bio = bio_alloc(GFP_NOIO, nr);
+ bio->bi_sector = block->block_nr << (bits - SECTOR_SHIFT);
+ bio->bi_bdev = mapping_data_bdev(mapping);
+ bio->bi_end_io = block_end_bio_io;
+ bio->bi_private = block;
+
+ if (!fsblock_superpage(block)) {
+ unsigned int offset = 0;
+
+ if (fsblock_subpage(block))
+ offset = block_page_offset(block, size);
+ if (bio_add_page(bio, page, size, offset) != size)
+ FSB_BUG();
+ } else {
+ struct page *p;
+ int i;
+
+ i = 0;
+ for_each_page(page, size, p) {
+ if (bio_add_page(bio, p, PAGE_CACHE_SIZE, 0) != PAGE_CACHE_SIZE)
+ FSB_BUG();
+ i++;
+ } end_for_each_page;
+ FSB_BUG_ON(i != nr);
+ }
+
+ block_get(block);
+ bio_get(bio);
+ submit_bio(rw, bio);
+
+ if (bio_flagged(bio, BIO_EOPNOTSUPP))
+ ret = -EOPNOTSUPP;
+
+ bio_put(bio);
+ return ret;
+}
+
+static int read_block(struct fsblock *block)
+{
+ FSB_BUG_ON(PageWriteback(block->page));
+ FSB_BUG_ON(test_bit(BL_readin, &block->flags));
+ FSB_BUG_ON(test_bit(BL_writeback, &block->flags));
+ FSB_BUG_ON(test_bit(BL_dirty, &block->flags));
+ set_bit(BL_readin, &block->flags);
+ return submit_block(block, READ);
+}
+
+static int write_block(struct fsblock *block)
+{
+ FSB_BUG_ON(!PageWriteback(block->page));
+ FSB_BUG_ON(test_bit(BL_readin, &block->flags));
+ FSB_BUG_ON(test_bit(BL_writeback, &block->flags));
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ set_bit(BL_writeback, &block->flags);
+ return submit_block(block, WRITE);
+}
+
+int sync_block(struct fsblock *block)
+{
+ int ret = 0;
+
+ if (test_bit(BL_dirty, &block->flags)) {
+ struct page *page = block->page;
+
+ iolock_block(block);
+ wait_on_block_writeback(block);
+ FSB_BUG_ON(PageWriteback(page)); /* because block is locked */
+ if (test_bit(BL_dirty, &block->flags)) {
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ clear_bit(BL_dirty, &block->flags);
+
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+ for_each_block(page_blocks(page), b) {
+ if (test_bit(BL_dirty, &b->flags))
+ goto page_dirty;
+ }
+ }
+ if (!fsblock_superpage(block)) {
+ ret = clear_page_dirty_for_io(page);
+ FSB_BUG_ON(!ret);
+ } else {
+ struct page *p;
+ for_each_page(page, fsblock_size(block), p) {
+ clear_page_dirty_for_io(p);
+ } end_for_each_page;
+ }
+page_dirty:
+ set_block_writeback(block);
+
+ ret = write_block(block);
+ if (!ret) {
+ wait_on_block_writeback(block);
+ if (test_bit(BL_error, &block->flags))
+ ret = -EIO;
+ }
+ } else
+ iounlock_block(block);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(sync_block);
+
+void mark_mblock_uptodate(struct fsblock_meta *mblock)
+{
+ struct fsblock *block = mblock_block(mblock);
+ struct page *page = block->page;
+
+ if (fsblock_superpage(block)) {
+ struct page *p;
+ for_each_page(page, fsblock_size(block), p) {
+ SetPageUptodate(p);
+ } end_for_each_page;
+ } else if (fsblock_midpage(block)) {
+ SetPageUptodate(page);
+ } /* XXX: could check for all subblocks uptodate */
+ set_bit(BL_uptodate, &block->flags);
+}
+
+int mark_mblock_dirty(struct fsblock_meta *mblock)
+{
+ struct page *page;
+ FSB_BUG_ON(!fsblock_superpage(&mblock->block) &&
+ !test_bit(BL_uptodate, &mblock->block.flags));
+
+ if (test_and_set_bit(BL_dirty, &mblock->block.flags))
+ return 0;
+
+ page = mblock_block(mblock)->page;
+ if (!fsblock_superpage(mblock_block(mblock))) {
+ __set_page_dirty_noblocks(page);
+ } else {
+ struct page *p;
+ for_each_page(page, fsblock_size(mblock_block(mblock)), p) {
+ __set_page_dirty_noblocks(p);
+ } end_for_each_page;
+ }
+ return 1;
+}
+EXPORT_SYMBOL(mark_mblock_dirty);
+
+/*
+ * XXX: this is good, but is complex and inhibits block reclaim for now.
+ * Reworking so that it gets removed if the block is cleaned might be a
+ * good option? (would require a block flag)
+ */
+struct mb_assoc {
+ struct list_head mlist;
+ struct address_space *mapping;
+
+ struct list_head blist;
+ struct fsblock_meta *mblock;
+};
+
+int mark_mblock_dirty_inode(struct fsblock_meta *mblock, struct inode *inode)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct fsblock *block = mblock_block(mblock);
+ struct mb_assoc *mba;
+ int ret;
+
+ ret = mark_mblock_dirty(mblock);
+
+ bit_spin_lock(BL_assoc_lock, &block->flags);
+ if (block->private) {
+ mba = (struct mb_assoc *)block->private;
+ do {
+ FSB_BUG_ON(mba->mblock != mblock);
+ if (mba->mapping == inode->i_mapping)
+ goto out;
+ mba = list_entry(mba->blist.next,struct mb_assoc,blist);
+ } while (mba != block->private);
+ }
+ mba = kmalloc(sizeof(struct mb_assoc), GFP_ATOMIC);
+ if (unlikely(!mba)) {
+ bit_spin_unlock(BL_assoc_lock, &block->flags);
+ sync_block(block);
+ return ret;
+ }
+ INIT_LIST_HEAD(&mba->mlist);
+ mba->mapping = mapping;
+ INIT_LIST_HEAD(&mba->blist);
+ mba->mblock = mblock;
+ if (block->private)
+ list_add(&mba->blist, ((struct mb_assoc *)block->private)->blist.prev);
+ block->private = mba;
+ spin_lock(&mapping->private_lock);
+ list_add_tail(&mba->mlist, &mapping->private_list);
+ spin_unlock(&mapping->private_lock);
+
+out:
+ bit_spin_unlock(BL_assoc_lock, &block->flags);
+ return ret;
+}
+EXPORT_SYMBOL(mark_mblock_dirty_inode);
+
+int fsblock_sync(struct address_space *mapping)
+{
+ int err, ret;
+ LIST_HEAD(list);
+ struct mb_assoc *mba, *tmp;
+
+ spin_lock(&mapping->private_lock);
+ list_splice_init(&mapping->private_list, &list);
+ spin_unlock(&mapping->private_lock);
+
+ err = 0;
+ list_for_each_entry_safe(mba, tmp, &list, mlist) {
+ struct fsblock *block = mblock_block(mba->mblock);
+
+ FSB_BUG_ON(mba->mapping != mapping);
+
+ bit_spin_lock(BL_assoc_lock, &block->flags);
+ if (list_empty(&mba->blist))
+ block->private = NULL;
+ else {
+ if (block->private == mba)
+ block->private = list_entry(mba->blist.next,struct mb_assoc,blist);
+ list_del(&mba->blist);
+ }
+ bit_spin_unlock(BL_assoc_lock, &block->flags);
+
+ iolock_block(block);
+ wait_on_block_writeback(block);
+ if (test_bit(BL_dirty, &block->flags)) {
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ clear_bit(BL_dirty, &block->flags);
+ ret = write_block(block);
+ if (ret && !err)
+ err = ret;
+ } else
+ iounlock_block(block);
+ }
+
+ while (!list_empty(&list)) {
+ struct fsblock *block;
+
+ /* Go in reverse order to reduce context switching */
+ mba = list_entry(list.prev, struct mb_assoc, mlist);
+ list_del(&mba->mlist);
+
+ block = mblock_block(mba->mblock);
+ wait_on_block_writeback(block);
+ if (test_bit(BL_error, &block->flags)) {
+ if (!err)
+ err = -EIO;
+ set_bit(AS_EIO, &mba->mapping->flags);
+ }
+ kfree(mba);
+ }
+ return err;
+}
+EXPORT_SYMBOL(fsblock_sync);
+
+int fsblock_release(struct address_space *mapping, int force)
+{
+ struct mb_assoc *mba;
+ LIST_HEAD(list);
+
+ if (!mapping_has_private(mapping))
+ return 1;
+
+ spin_lock(&mapping->private_lock);
+ if (!force) {
+ list_for_each_entry(mba, &mapping->private_list, mlist) {
+ struct fsblock *block = mblock_block(mba->mblock);
+ if (test_bit(BL_dirty, &block->flags)) {
+ spin_unlock(&mapping->private_lock);
+ return 0;
+ }
+ }
+ }
+ list_splice_init(&mapping->private_list, &list);
+ spin_unlock(&mapping->private_lock);
+
+ while (!list_empty(&list)) {
+ struct fsblock *block;
+
+ mba = list_entry(list.prev, struct mb_assoc, mlist);
+ list_del(&mba->mlist);
+
+ block = mblock_block(mba->mblock);
+ bit_spin_lock(BL_assoc_lock, &block->flags);
+ if (list_empty(&mba->blist))
+ block->private = NULL;
+ else {
+ if (block->private == mba)
+ block->private = list_entry(mba->blist.next,struct mb_assoc,blist);
+ list_del(&mba->blist);
+ }
+ bit_spin_unlock(BL_assoc_lock, &block->flags);
+
+ if (test_bit(BL_error, &block->flags))
+ set_bit(AS_EIO, &mba->mapping->flags);
+ kfree(mba);
+ }
+ return 1;
+}
+EXPORT_SYMBOL(fsblock_release);
+
+static void sync_underlying_metadata(struct fsblock *block)
+{
+ struct address_space *mapping = block->page->mapping;
+ struct block_device *bdev = mapping_data_bdev(mapping);
+ struct fsblock *meta_block;
+ sector_t blocknr = block->block_nr;
+
+ /* XXX: should this just invalidate rather than write back? */
+
+ FSB_BUG_ON(test_bit(BL_metadata, &block->flags));
+
+ meta_block = __find_get_block(bdev->bd_inode->i_mapping, blocknr);
+ if (meta_block) {
+ int err;
+
+ FSB_BUG_ON(!test_bit(BL_metadata, &meta_block->flags));
+ /*
+ * Could actually do a memory copy here to bring
+ * the block uptodate. Probably not worthwhile.
+ */
+ FSB_BUG_ON(block == meta_block);
+ err = sync_block(meta_block);
+ if (!err)
+ FSB_BUG_ON(test_bit(BL_dirty, &meta_block->flags));
+ else {
+ clear_bit(BL_dirty, &meta_block->flags);
+ wait_on_block_iolock(meta_block);
+ }
+ }
+}
+
+struct fsblock_meta *mbread(struct block_device *bdev, sector_t blocknr, unsigned int size)
+{
+ struct fsblock_meta *mblock;
+
+ mblock = find_or_create_mblock(bdev, blocknr, size);
+ if (!IS_ERR(mblock)) {
+ struct fsblock *block = &mblock->block;
+
+ if (!test_bit(BL_uptodate, &block->flags)) {
+ iolock_block(block);
+ if (!test_bit(BL_uptodate, &block->flags)) {
+ int ret;
+ FSB_BUG_ON(PageWriteback(block->page));
+ FSB_BUG_ON(test_bit(BL_dirty, &block->flags));
+ set_block_sync_io(block);
+ ret = read_block(block);
+ if (ret) {
+ /* XXX: handle errors properly */
+ block_put(block);
+ mblock = ERR_PTR(ret);
+ } else {
+ wait_on_block_sync_io(block);
+ if (!test_bit(BL_uptodate, &block->flags))
+ mblock = ERR_PTR(-EIO);
+ FSB_BUG_ON(size >= PAGE_CACHE_SIZE && !PageUptodate(block->page));
+ }
+ }
+ iounlock_block(block);
+ }
+ }
+
+ return mblock;
+}
+EXPORT_SYMBOL(mbread);
+
+/*
+ * XXX: maybe either don't have a generic version, or change the
+ * insert_mapping scheme so that it fills fsblocks rather than inserts them
+ * live into pages?
+ */
+sector_t fsblock_bmap(struct address_space *mapping, sector_t blocknr, insert_mapping_fn *insert_mapping)
+{
+ struct fsblock *block;
+ struct inode *inode = mapping->host;
+ sector_t ret;
+
+ block = __find_get_block(mapping, blocknr);
+ if (!block) {
+ pgoff_t pgoff = sector_pgoff(blocknr, inode->i_blkbits);
+ unsigned int size = 1 << inode->i_blkbits;
+ struct page *page;
+
+ page = create_lock_page_range(mapping, pgoff, size);
+ if (!page)
+ return 0;
+
+ if (create_unmapped_blocks(page, GFP_NOFS, size, CREATE_METADATA))
+ return 0;
+
+ ret = insert_mapping(mapping, pgoff, PAGE_CACHE_SIZE, 0);
+
+ block = __find_get_block(mapping, blocknr);
+ FSB_BUG_ON(!block);
+
+ unlock_page_range(page, size);
+ }
+
+ FSB_BUG_ON(test_bit(BL_new, &block->flags));
+ ret = 0;
+ if (test_bit(BL_mapped, &block->flags))
+ ret = block->block_nr;
+
+ return ret;
+}
+EXPORT_SYMBOL(fsblock_bmap);
+
+static int relock_superpage_block(struct page **pagep, unsigned int size)
+{
+ struct page *page = *pagep;
+ pgoff_t index = page->index;
+ pgoff_t first = first_page_idx(page->index, size);
+ struct address_space *mapping = page->mapping;
+
+ /*
+ * XXX: this is a bit of a hack because the ->readpage and other
+ * aops APIs are not so nice. Should convert over to a ->read_range
+ * API that does the offset, length thing and allows caller locking?
+ * (also getting rid of ->readpages).
+ */
+ unlock_page(page);
+ *pagep = create_lock_page_range(mapping, first, size);
+ if (!*pagep) {
+ lock_page(page);
+ return -ENOMEM;
+ }
+ if (page != find_page(mapping, index)) {
+ unlock_page_range(*pagep, size);
+ return AOP_TRUNCATED_PAGE;
+ }
+ return 0;
+}
+
+static int block_read_helper(struct page *page, struct fsblock *block)
+{
+ FSB_BUG_ON(test_bit(BL_new, &block->flags));
+
+ if (test_bit(BL_uptodate, &block->flags))
+ return 0;
+
+ FSB_BUG_ON(PageUptodate(page));
+
+ if (!test_bit(BL_mapped, &block->flags)) {
+ unsigned int size = fsblock_size(block);
+ unsigned int offset = block_page_offset(block, size);
+ zero_user_page(page, offset, size, KM_USER0);
+ set_bit(BL_uptodate, &block->flags);
+ return 0;
+ }
+
+ if (!test_bit(BL_uptodate, &block->flags)) {
+ FSB_BUG_ON(test_bit(BL_readin, &block->flags));
+ FSB_BUG_ON(test_bit(BL_writeback, &block->flags));
+ set_bit(BL_readin, &block->flags);
+ return 1;
+ }
+ return 0;
+}
+
+int fsblock_read_page(struct page *page, insert_mapping_fn *insert_mapping)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ unsigned int size = 1 << inode->i_blkbits;
+ struct fsblock *block;
+ int ret;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(PageUptodate(page));
+ FSB_BUG_ON(PageWriteback(page));
+
+ if (size_is_superpage(size)) {
+ struct page *orig_page = page;
+
+ ret = relock_superpage_block(&page, size);
+ if (ret)
+ return ret;
+ if (PageUptodate(orig_page))
+ goto out_unlock;
+ }
+
+ if (!PageBlocks(page)) {
+ ret = create_unmapped_blocks(page, GFP_NOFS, size, 0);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /* XXX: optimise away if page is mapped to disk */
+ ret = insert_mapping(mapping, page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 0);
+ /* XXX: SetPageError on failure? */
+ if (ret)
+ goto out_unlock;
+
+ block = page_blocks(page);
+
+ if (!fsblock_superpage(block)) {
+
+ if (fsblock_subpage(block)) {
+ int nr = 0;
+ struct fsblock *b;
+ for_each_block(block, b)
+ nr += block_read_helper(page, b);
+ if (nr == 0) {
+ /* Hole? */
+ SetPageUptodate(page);
+ goto out_unlock;
+ }
+ for_each_block(block, b) {
+ if (!test_bit(BL_readin, &b->flags))
+ continue;
+
+ ret = submit_block(b, READ);
+ if (ret)
+ goto out_unlock;
+ /*
+ * XXX: must handle errors properly (eg. wait
+ * for outstanding reads before unlocking the
+ * page?
+ */
+ }
+ } else {
+ if (block_read_helper(page, block)) {
+ ret = submit_block(block, READ);
+ if (ret)
+ goto out_unlock;
+ } else {
+ SetPageUptodate(page);
+ goto out_unlock;
+ }
+ }
+ } else {
+ struct page *p;
+
+ ret = 0;
+
+ FSB_BUG_ON(test_bit(BL_new, &block->flags));
+ FSB_BUG_ON(test_bit(BL_uptodate, &block->flags));
+ FSB_BUG_ON(test_bit(BL_dirty, &block->flags));
+
+ if (!test_bit(BL_mapped, &block->flags)) {
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(PageUptodate(p));
+ zero_user_page(p, 0, PAGE_CACHE_SIZE, KM_USER0);
+ SetPageUptodate(p);
+ unlock_page(p);
+ } end_for_each_page;
+ set_bit(BL_uptodate, &block->flags);
+ } else {
+ ret = read_block(block);
+ if (ret)
+ goto out_unlock;
+ }
+ }
+ FSB_BUG_ON(ret);
+ return 0;
+
+out_unlock:
+ unlock_page_range(page, size);
+ return ret;
+}
+EXPORT_SYMBOL(fsblock_read_page);
+
+static int block_write_helper(struct page *page, struct fsblock *block)
+{
+ FSB_BUG_ON(!test_bit(BL_mapped, &block->flags));
+
+ if (test_bit(BL_new, &block->flags)) {
+ sync_underlying_metadata(block);
+ clear_bit(BL_new, &block->flags);
+ set_bit(BL_dirty, &block->flags);
+ }
+
+ if (test_bit(BL_dirty, &block->flags)) {
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ clear_bit(BL_dirty, &block->flags);
+ FSB_BUG_ON(test_bit(BL_readin, &block->flags));
+ FSB_BUG_ON(test_bit(BL_writeback, &block->flags));
+ set_bit(BL_writeback, &block->flags);
+ return 1;
+ /*
+ * XXX: Careful of ordering between clear buffer / page dirty
+ * and set buffer / page dirty
+ */
+ }
+ return 0;
+}
+
+/* XXX: must obey non-blocking writeout! */
+int fsblock_write_page(struct page *page, insert_mapping_fn *insert_mapping,
+ struct writeback_control *wbc)
+{
+ struct address_space *mapping = page->mapping;
+ unsigned int size = 1 << mapping->host->i_blkbits;
+ struct fsblock *block;
+ int ret;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(PageWriteback(page));
+
+ if (size_is_superpage(size)) {
+ struct page *orig_page = page;
+
+ redirty_page_for_writepage(wbc, orig_page);
+ ret = relock_superpage_block(&page, size);
+ if (ret)
+ return ret;
+ if (!clear_page_dirty_for_io(orig_page))
+ goto out_unlock;
+ }
+
+ if (!PageBlocks(page)) {
+ FSB_BUG(); /* XXX: should always have blocks here */
+ FSB_BUG_ON(!PageUptodate(page));
+ /* XXX: should rework (eg use page_mkwrite) so as to always
+ * have blocks by this stage!!! */
+ ret = create_unmapped_blocks(page, GFP_NOFS, size, CREATE_DIRTY);
+ if (ret)
+ goto out_unlock;
+ }
+
+ ret = insert_mapping(mapping, page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 1);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * XXX: todo - i_size handling ... should it be here?!?
+ * No - I would prefer partial page zeroing to go in filemap_nopage
+ * and tolerate writing of crap past EOF in filesystems -- no
+ * other sane way to do it other than invalidating a partial page
+ * before zeroing before writing it out in order that we can
+ * guarantee it isn't touched after zeroing.
+ */
+
+ block = page_blocks(page);
+
+ if (!fsblock_superpage(block)) {
+
+ if (fsblock_subpage(block)) {
+ int nr = 0;
+ struct fsblock *b;
+ for_each_block(block, b)
+ nr += block_write_helper(page, b);
+ /* XXX: technically could happen (see set_page_dirty_blocks) */
+ FSB_BUG_ON(nr == 0);
+ if (nr == 0)
+ goto out_unlock;
+
+ FSB_BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+ unlock_page(page);
+ for_each_block(block, b) {
+ if (!test_bit(BL_writeback, &b->flags))
+ continue;
+ ret = submit_block(b, WRITE);
+ if (ret)
+ goto out_unlock;
+ /* XXX: error handling */
+ }
+ } else {
+ if (block_write_helper(page, block)) {
+ FSB_BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+ unlock_page(page);
+ ret = submit_block(block, WRITE);
+ if (ret)
+ goto out_unlock;
+ } else {
+ FSB_BUG(); /* XXX: see above */
+ goto out_unlock;
+ }
+ }
+ } else {
+ struct page *p;
+
+ FSB_BUG_ON(!test_bit(BL_mapped, &block->flags));
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ FSB_BUG_ON(!test_bit(BL_dirty, &block->flags));
+ FSB_BUG_ON(test_bit(BL_new, &block->flags));
+
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(page_blocks(p) != block);
+ FSB_BUG_ON(!PageUptodate(p));
+ } end_for_each_page;
+
+ for_each_page(page, size, p) {
+ clear_page_dirty_for_io(p);
+ FSB_BUG_ON(PageWriteback(p));
+ FSB_BUG_ON(!PageUptodate(p));
+ set_page_writeback(p);
+ unlock_page(p);
+ } end_for_each_page;
+
+ /* XXX: recheck ordering here! don't want to lose dirty bits */
+
+ clear_bit(BL_dirty, &block->flags);
+ ret = write_block(block);
+ if (ret)
+ goto out_unlock;
+ }
+ FSB_BUG_ON(ret);
+ return 0;
+
+out_unlock:
+ unlock_page_range(page, size);
+ return ret;
+}
+EXPORT_SYMBOL(fsblock_write_page);
+
+static int block_dirty_helper(struct page *page, struct fsblock *block)
+{
+ FSB_BUG_ON(!test_bit(BL_mapped, &block->flags));
+
+ if (test_bit(BL_uptodate, &block->flags))
+ return 0;
+
+ FSB_BUG_ON(PageUptodate(page));
+
+ if (test_bit(BL_new, &block->flags)) {
+ unsigned int size = fsblock_size(block);
+ unsigned int offset = block_page_offset(block, size);
+ zero_user_page(page, offset, size, KM_USER0);
+ set_bit(BL_uptodate, &block->flags);
+ sync_underlying_metadata(block);
+ clear_bit(BL_new, &block->flags);
+ set_bit(BL_dirty, &block->flags);
+ __set_page_dirty_noblocks(page);
+ return 0;
+ }
+ return 1;
+}
+
+static int fsblock_prepare_write_super(struct page *orig_page,
+ unsigned int size, unsigned from, unsigned to,
+ insert_mapping_fn *insert_mapping)
+{
+ struct address_space *mapping = orig_page->mapping;
+ struct fsblock *block;
+ struct page *page = orig_page, *p;
+ int ret;
+
+ ret = relock_superpage_block(&page, size);
+ if (ret)
+ return ret;
+
+ FSB_BUG_ON(PageBlocks(page) != PageBlocks(orig_page));
+ if (!PageBlocks(page)) {
+ FSB_BUG_ON(PageDirty(orig_page));
+ FSB_BUG_ON(PageDirty(page));
+ ret = create_unmapped_blocks(page, GFP_NOFS, size, 0);
+ if (ret)
+ goto out_unlock;
+ }
+ FSB_BUG_ON(!PageBlocks(page));
+
+ ret = insert_mapping(mapping, page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 1);
+ if (ret)
+ goto out_unlock;
+
+ block = page_blocks(page);
+
+ if (test_bit(BL_new, &block->flags)) {
+ for_each_page(page, size, p) {
+ if (!PageUptodate(p)) {
+ FSB_BUG_ON(PageDirty(p));
+ zero_user_page(p, 0, PAGE_CACHE_SIZE, KM_USER0);
+ SetPageUptodate(p);
+ }
+ __set_page_dirty_noblocks(p);
+ } end_for_each_page;
+
+ set_bit(BL_uptodate, &block->flags);
+ sync_underlying_metadata(block);
+ clear_bit(BL_new, &block->flags);
+ set_bit(BL_dirty, &block->flags);
+ } else if (!test_bit(BL_uptodate, &block->flags)) {
+ FSB_BUG_ON(test_bit(BL_dirty, &block->flags));
+
+ set_block_sync_io(block);
+ ret = read_block(block);
+ if (ret)
+ goto out_unlock;
+ wait_on_block_sync_io(block);
+ if (!test_bit(BL_uptodate, &block->flags)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+ }
+
+ return 0;
+
+out_unlock:
+ unlock_page_range(page, size);
+ lock_page(orig_page);
+ FSB_BUG_ON(!ret);
+ return ret;
+}
+
+int fsblock_prepare_write(struct page *page, unsigned from, unsigned to,
+ insert_mapping_fn *insert_mapping)
+{
+ struct address_space *mapping = page->mapping;
+ unsigned int size = 1 << mapping->host->i_blkbits;
+ struct fsblock *block;
+ int ret, nr;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(from > PAGE_CACHE_SIZE);
+ FSB_BUG_ON(to > PAGE_CACHE_SIZE);
+ FSB_BUG_ON(from > to);
+
+ if (size_is_superpage(size))
+ return fsblock_prepare_write_super(page, size, from, to, insert_mapping);
+
+ if (!PageBlocks(page)) {
+ ret = create_unmapped_blocks(page, GFP_NOFS, size, 0);
+ if (ret)
+ return ret;
+ }
+
+ ret = insert_mapping(mapping, page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 1);
+ if (ret)
+ return ret;
+
+ block = page_blocks(page);
+
+ nr = 0;
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+ for_each_block(block, b)
+ nr += block_dirty_helper(page, b);
+ } else
+ nr += block_dirty_helper(page, block);
+ if (nr == 0)
+ SetPageUptodate(page);
+
+ if (PageUptodate(page))
+ return 0;
+
+ if (to - from == PAGE_CACHE_SIZE)
+ return 0;
+
+ /*
+ * XXX: this is stupid, could do better with write_begin aops, or
+ * just zero out unwritten partial blocks.
+ */
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+ for_each_block(block, b) {
+ if (test_bit(BL_uptodate, &b->flags))
+ continue;
+ set_block_sync_io(b);
+ ret = read_block(b);
+ if (ret)
+ break;
+ }
+
+ for_each_block(block, b) {
+ wait_on_block_sync_io(b);
+ if (!ret && !test_bit(BL_uptodate, &b->flags))
+ ret = -EIO;
+ }
+ if (ret)
+ return ret;
+ } else {
+
+ FSB_BUG_ON(test_bit(BL_uptodate, &block->flags));
+ set_block_sync_io(block);
+ ret = read_block(block);
+ if (ret)
+ return ret;
+ wait_on_block_sync_io(block);
+ if (test_bit(BL_error, &block->flags))
+ SetPageError(page);
+ if (!test_bit(BL_uptodate, &block->flags))
+ return -EIO;
+ }
+ SetPageUptodate(page);
+
+ return 0;
+}
+EXPORT_SYMBOL(fsblock_prepare_write);
+
+static int __fsblock_commit_write_super(struct page *orig_page,
+ struct fsblock *block, unsigned from, unsigned to)
+{
+ unsigned int size = fsblock_size(block);
+ struct page *page, *p;
+
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ set_bit(BL_dirty, &block->flags);
+ page = block->page;
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(!PageUptodate(p));
+ __set_page_dirty_noblocks(p);
+ } end_for_each_page;
+ unlock_page_range(page, size);
+ lock_page(orig_page);
+
+ return 0;
+}
+
+static int __fsblock_commit_write_sub(struct page *page,
+ struct fsblock *block, unsigned from, unsigned to)
+{
+ struct fsblock *b;
+
+ for_each_block(block, b) {
+ if (to - from < PAGE_CACHE_SIZE)
+ FSB_BUG_ON(!test_bit(BL_uptodate, &b->flags));
+ else
+ set_bit(BL_uptodate, &block->flags);
+ if (!test_bit(BL_dirty, &b->flags))
+ set_bit(BL_dirty, &b->flags);
+ }
+ if (to - from < PAGE_CACHE_SIZE)
+ FSB_BUG_ON(!PageUptodate(page));
+ else
+ SetPageUptodate(page);
+ __set_page_dirty_noblocks(page);
+
+ return 0;
+}
+
+int __fsblock_commit_write(struct page *page, unsigned from, unsigned to)
+{
+ struct fsblock *block;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(from > PAGE_CACHE_SIZE);
+ FSB_BUG_ON(to > PAGE_CACHE_SIZE);
+ FSB_BUG_ON(from > to);
+ FSB_BUG_ON(!PageBlocks(page));
+
+ block = page_blocks(page);
+ FSB_BUG_ON(!test_bit(BL_mapped, &block->flags));
+
+ if (fsblock_superpage(block))
+ return __fsblock_commit_write_super(page, block, from, to);
+ if (fsblock_subpage(block))
+ return __fsblock_commit_write_sub(page, block, from, to);
+
+ if (to - from < PAGE_CACHE_SIZE) {
+ FSB_BUG_ON(!PageUptodate(page));
+ FSB_BUG_ON(!test_bit(BL_uptodate, &block->flags));
+ } else {
+ set_bit(BL_uptodate, &block->flags);
+ SetPageUptodate(page);
+ }
+
+ if (!test_bit(BL_dirty, &block->flags))
+ set_bit(BL_dirty, &block->flags);
+ __set_page_dirty_noblocks(page);
+
+ return 0;
+}
+EXPORT_SYMBOL(__fsblock_commit_write);
+
+int fsblock_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode;
+ loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ int ret;
+
+ inode = page->mapping->host;
+ ret = __fsblock_commit_write(page, from, to);
+
+ /*
+ * No need to use i_size_read() here, the i_size
+ * cannot change under us because we hold i_mutex.
+ */
+ if (!ret && pos > inode->i_size) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+ }
+ return ret;
+
+}
+EXPORT_SYMBOL(fsblock_commit_write);
+
+/* XXX: this is racy I think (must verify versus page_mkclean). Must have
+ * some operation to pin a page's metadata while dirtying it. (this will
+ * fix get_user_pages for dirty as well once callers are converted).
+ */
+int fsblock_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ struct address_space *mapping;
+ const struct address_space_operations *a_ops;
+ int ret = 0;
+
+ lock_page(page);
+ mapping = page->mapping;
+ if (!mapping) {
+ /* Caller will take care of it */
+ goto out;
+ }
+ a_ops = mapping->a_ops;
+
+ /* XXX: don't instantiate blocks past isize! (same for truncate?) */
+ ret = a_ops->prepare_write(NULL, page, 0, PAGE_CACHE_SIZE);
+ if (ret == 0)
+ ret = __fsblock_commit_write(page, 0, PAGE_CACHE_SIZE);
+out:
+ unlock_page(page);
+
+ return ret;
+}
+EXPORT_SYMBOL(fsblock_page_mkwrite);
+
+static int fsblock_truncate_page_super(struct address_space *mapping, loff_t from)
+{
+ pgoff_t index;
+ unsigned offset;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ unsigned int size = 1 << mapping->host->i_blkbits;
+ unsigned int nr_pages;
+ unsigned int length;
+ int i, err;
+
+ length = from & (size - 1);
+ if (length == 0)
+ return 0;
+
+ nr_pages = ((size - length + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT);
+ index = from >> PAGE_CACHE_SHIFT;
+ offset = from & (PAGE_CACHE_SIZE-1);
+
+ err = 0;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page;
+
+ page = grab_cache_page(mapping, index + i);
+ if (!page) {
+ err = -ENOMEM;
+ break;
+ }
+
+ err = a_ops->prepare_write(NULL, page, offset, PAGE_CACHE_SIZE);
+ if (!err) {
+ FSB_BUG_ON(!PageBlocks(page));
+ zero_user_page(page, offset, PAGE_CACHE_SIZE-offset, KM_USER0);
+ err = __fsblock_commit_write(page, offset, PAGE_CACHE_SIZE);
+ }
+
+ unlock_page(page);
+ page_cache_release(page);
+ if (err)
+ break;
+ offset = 0;
+ }
+ return err;
+}
+
+int fsblock_truncate_page(struct address_space *mapping, loff_t from)
+{
+ pgoff_t index;
+ unsigned offset;
+ struct page *page;
+ const struct address_space_operations *a_ops = mapping->a_ops;
+ unsigned int size = 1 << mapping->host->i_blkbits;
+ unsigned int length;
+ int ret;
+
+ if (size_is_superpage(size))
+ return fsblock_truncate_page_super(mapping, from);
+
+ length = from & (size - 1);
+ if (length == 0)
+ return 0;
+
+ index = from >> PAGE_CACHE_SHIFT;
+ offset = from & (PAGE_CACHE_SIZE-1);
+
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return -ENOMEM;
+
+ ret = a_ops->prepare_write(NULL, page, offset, PAGE_CACHE_SIZE);
+ if (ret == 0) {
+ zero_user_page(page, offset, PAGE_CACHE_SIZE-offset, KM_USER0);
+ /*
+ * a_ops->commit_write would extend i_size :( Have to assume
+ * caller uses fsblock_prepare_write.
+ */
+ ret = __fsblock_commit_write(page, offset, PAGE_CACHE_SIZE);
+ }
+ unlock_page(page);
+ page_cache_release(page);
+ return ret;
+}
+EXPORT_SYMBOL(fsblock_truncate_page);
+
+static int can_free_block(struct fsblock *block)
+{
+ return atomic_read(&block->count) == 1 &&
+ !test_bit(BL_dirty, &block->flags) &&
+ !block->private;
+}
+
+static int __try_to_free_block(struct fsblock *block)
+{
+ int ret = 0;
+ if (can_free_block(block)) {
+ if (atomic_dec_and_test(&block->count)) {
+ if (!test_bit(BL_dirty, &block->flags)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ atomic_inc(&block->count);
+ }
+out:
+ unlock_block(block);
+
+ return ret;
+}
+
+static int try_to_free_block(struct fsblock *block)
+{
+ /*
+ * XXX: get rid of block locking from here and invalidate_block --
+ * use page lock instead?
+ */
+ if (trylock_block(block))
+ return __try_to_free_block(block);
+ return 0;
+}
+
+static int try_to_free_blocks_super(struct page *orig_page, int all_locked)
+{
+ unsigned int size;
+ struct fsblock *block;
+ struct page *page, *p;
+ int i;
+ int ret = 0;
+
+ FSB_BUG_ON(!PageLocked(orig_page));
+ FSB_BUG_ON(!PageBlocks(orig_page));
+
+ if (PageDirty(orig_page) || PageWriteback(orig_page))
+ return ret;
+
+ block = page_blocks(orig_page);
+ page = block->page;
+ size = fsblock_size(block);
+
+ i = 0;
+ if (!all_locked) {
+ for_each_page(page, size, p) {
+ if (p != orig_page) {
+ if (TestSetPageLocked(p))
+ goto out;
+ i++;
+ if (PageWriteback(p))
+ goto out;
+ }
+ } end_for_each_page;
+ }
+
+ assert_block(block);
+
+ if (!can_free_block(block))
+ goto out;
+ if (!try_to_free_block(block))
+ goto out;
+
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(!PageLocked(p));
+ FSB_BUG_ON(!PageBlocks(p));
+ FSB_BUG_ON(PageWriteback(p));
+ clear_page_blocks(p);
+ } end_for_each_page;
+
+ free_block(block);
+
+ ret = 1;
+
+out:
+ if (i > 0) {
+ for_each_page(page, size, p) {
+ FSB_BUG_ON(PageDirty(p)); /* XXX: racy? */
+ if (p != orig_page) {
+ unlock_page(p);
+ i--;
+ if (i == 0)
+ break;
+ }
+ } end_for_each_page;
+ }
+ return ret;
+}
+
+static int __try_to_free_blocks(struct page *page, int all_locked)
+{
+ unsigned int size;
+ struct fsblock *block;
+
+ FSB_BUG_ON(!PageLocked(page));
+ FSB_BUG_ON(!PageBlocks(page));
+
+ if (PageDirty(page) || PageWriteback(page))
+ return 0;
+
+ block = page_blocks(page);
+ if (fsblock_superpage(block))
+ return try_to_free_blocks_super(page, all_locked);
+
+ assert_block(block);
+ if (fsblock_subpage(block)) {
+ struct fsblock *b;
+
+ for_each_block(block, b) {
+ if (!can_free_block(b))
+ return 0;
+ }
+
+ for_each_block(block, b) {
+ /*
+ * must decrement head block last, so that if the
+ * find_get_page_block fails, then the blocks will
+ * really be freed.
+ */
+ if (b == block)
+ continue;
+ if (!try_to_free_block(b))
+ goto error;
+ }
+ if (!try_to_free_block(block))
+ goto error;
+
+ size = fsblock_size(block);
+ FSB_BUG_ON(block != page_blocks(page));
+ goto success;
+error:
+ fo