[PATCH 8/9] Unionfs: fold do_readpage into unionfs_readpage

Previous thread: [ANNOUNCE] Ceph distributed file system by Sage Weil on Monday, November 12, 2007 - 6:51 pm. (3 messages)

Next thread: [ANNOUNCE] util-linux-ng 2.13.1-rc1 by Karel Zak on Tuesday, November 13, 2007 - 4:24 am. (1 message)
From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

The following is a series of patches related to Unionfs.  The main changes
here are bug fixes and improved cache-coherency methods.

These patches were tested (where appropriate) on Linus's 2.6.24 latest code
(as of v2.6.24-rc2-330-g325d22d), for the first time on MM
(mmotm-2007-11-10-19-05), as well as the backports to
2.6.{23,22,21,20,19,18,9} on ext2/3/4, xfs, reiserfs, nfs2/3/4, jffs2,
ramfs, tmpfs, cramfs, and squashfs (where available).  See
http://unionfs.filesystems.org/ to download backported unionfs code.

Please pull from the 'master' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git

to receive the following:

Erez Zadok (9):
      Unionfs: flush and release updates
      Unionfs: use i_size wrappers
      Unionfs: update cache-coherency detection heuristics
      Unionfs: writepage updates
      Unionfs: clear partial read in readpage
      Unionfs: debugging updates
      Unionfs: remove unnecessary lower atime updates
      Unionfs: fold do_readpage into unionfs_readpage
      Unionfs: move debugging checks inside locks

 Documentation/filesystems/unionfs/concepts.txt |   18 ++++++
 fs/unionfs/commonfops.c                        |   47 +++++++-----------
 fs/unionfs/debug.c                             |    2 
 fs/unionfs/dentry.c                            |   31 +++++++----
 fs/unionfs/file.c                              |   18 +-----
 fs/unionfs/inode.c                             |   29 +++++------
 fs/unionfs/mmap.c                              |   65 ++++++++-----------------
 fs/unionfs/rdstate.c                           |    4 -
 fs/unionfs/rename.c                            |    4 -
 fs/unionfs/super.c                             |    6 +-
 fs/unionfs/union.h                             |    5 +
 fs/unionfs/xattr.c                             |    8 +--
 12 files changed, 115 insertions(+), 122 deletions(-)

---
Erez Zadok
ezk@cs.sunysb.edu
-

From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/commonfops.c |   10 +++++-----
 fs/unionfs/inode.c      |    4 ++--
 fs/unionfs/rdstate.c    |    4 ++--
 fs/unionfs/rename.c     |    4 ++--
 fs/unionfs/super.c      |    2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index ba84529..624f920 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -72,7 +72,8 @@ retry:
 	dput(tmp_dentry);
 
 	err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
-				bindex, file->f_path.dentry->d_inode->i_size);
+				bindex,
+				i_size_read(file->f_path.dentry->d_inode));
 	if (err) {
 		if (unlikely(err == -EEXIST))
 			goto retry;
@@ -199,7 +200,6 @@ static int open_highest_file(struct file *file, bool willwrite)
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *parent_inode = dentry->d_parent->d_inode;
 	struct super_block *sb = dentry->d_sb;
-	size_t inode_size = dentry->d_inode->i_size;
 
 	bstart = dbstart(dentry);
 	bend = dbend(dentry);
@@ -208,7 +208,7 @@ static int open_highest_file(struct file *file, bool willwrite)
 	if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
 		for (bindex = bstart - 1; bindex >= 0; bindex--) {
 			err = copyup_file(parent_inode, file, bstart, bindex,
-					  inode_size);
+					  i_size_read(dentry->d_inode));
 			if (!err)
 				break;
 		}
@@ -243,7 +243,6 @@ static int do_delayed_copyup(struct file *file)
 	int bindex, bstart, bend, err = 0;
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *parent_inode = dentry->d_parent->d_inode;
-	loff_t inode_size = dentry->d_inode->i_size;
 
 	bstart = fbstart(file);
 	bend = fbend(file);
@@ -255,7 +254,8 @@ static int do_delayed_copyup(struct file *file)
 	for (bindex = bstart - 1; bindex >= 0; bindex--) {
 		if (!d_deleted(dentry))
 			err = copyup_file(parent_inode, file, bstart,
-					  bindex, inode_size);
+					  bindex,
+					  ...
From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Remove the totalopens counter which was intended to reduce unnecessary
processing of d_deleted dentries.  Move that processing from file_release to
flush.

Cc: Hugh Dickins <hugh@veritas.com>

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/commonfops.c |   30 +++++++++++-------------------
 fs/unionfs/union.h      |    2 --
 2 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 50e5775..ba84529 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -551,9 +551,6 @@ int unionfs_open(struct inode *inode, struct file *file)
 	bstart = fbstart(file) = dbstart(dentry);
 	bend = fbend(file) = dbend(dentry);
 
-	/* increment, so that we can flush appropriately */
-	atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens);
-
 	/*
 	 * open all directories and make the unionfs file struct point to
 	 * these lower file structs
@@ -565,7 +562,6 @@ int unionfs_open(struct inode *inode, struct file *file)
 
 	/* freeing the allocated resources, and fput the opened files */
 	if (err) {
-		atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens);
 		for (bindex = bstart; bindex <= bend; bindex++) {
 			lower_file = unionfs_lower_file_idx(file, bindex);
 			if (!lower_file)
@@ -606,6 +602,7 @@ int unionfs_file_release(struct inode *inode, struct file *file)
 	struct unionfs_file_info *fileinfo;
 	struct unionfs_inode_info *inodeinfo;
 	struct super_block *sb = inode->i_sb;
+	struct dentry *dentry = file->f_path.dentry;
 	int bindex, bstart, bend;
 	int fgen, err = 0;
 
@@ -628,6 +625,7 @@ int unionfs_file_release(struct inode *inode, struct file *file)
 	bstart = fbstart(file);
 	bend = fbend(file);
 
+	unionfs_lock_dentry(dentry);
 	for (bindex = bstart; bindex <= bend; bindex++) {
 		lower_file = unionfs_lower_file_idx(file, bindex);
 
@@ -635,7 +633,15 @@ int unionfs_file_release(struct inode *inode, struct file *file)
 			fput(lower_file);
 			branchput(sb, bindex);
 		}
+
+		/* ...
From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Use a small delay to reduce the number of times unionfs has to detect
changed mtime's/ctime's, and also reduce the potential for false positives.
See Documentation/filesystems/unionfs/concepts.txt for a detailed
discussion.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 Documentation/filesystems/unionfs/concepts.txt |   18 ++++++++++++++
 fs/unionfs/dentry.c                            |   29 ++++++++++++++---------
 fs/unionfs/union.h                             |    3 ++
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt
index 37a62d8..7654ccc 100644
--- a/Documentation/filesystems/unionfs/concepts.txt
+++ b/Documentation/filesystems/unionfs/concepts.txt
@@ -177,5 +177,23 @@ would be really nice if the VFS would export an optional file system hook
 ->file_revalidate (similarly to dentry->d_revalidate) that will be called
 before each VFS op that has a "struct file" in it.
 
+Certain file systems have micro-second granularity (or better) for inode
+times, and asynchronous actions could cause those times to change with some
+small delay.  In such cases, Unionfs may see a changed inode time that only
+differs by a tiny fraction of a second: such a change may be a false
+positive indication that the lower object has changed, whereas if unionfs
+waits a little longer, that false indication will not be seen.  (These false
+positives are harmless, because they would at most cause unionfs to
+re-validate an object that may need no revalidation, and print a debugging
+message that clutters the console/logs.)  Therefore, to minimize the chances
+of these situations, we delay the detection of changed times by a small
+factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3
+seconds, as does NFS).  This means that we will detect the change, only a
+couple of seconds later, if indeed the time change persists in the lower
+file object.  This delayed detection ...
From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Simplify the code and reduce stack pressure a bit.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/mmap.c |   44 ++++++++++++++------------------------------
 1 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
index 34fd8aa..ef8822f 100644
--- a/fs/unionfs/mmap.c
+++ b/fs/unionfs/mmap.c
@@ -136,22 +136,22 @@ out:
 	return err;
 }
 
-/*
- * readpage is called from generic_page_read and the fault handler.
- * If your file system uses generic_page_read for the read op, it
- * must implement readpage.
- *
- * Readpage expects a locked page, and must unlock it.
- */
-static int unionfs_do_readpage(struct file *file, struct page *page)
+/* Readpage expects a locked page, and must unlock it */
+static int unionfs_readpage(struct file *file, struct page *page)
 {
-	int err = -EIO;
+	int err;
 	struct file *lower_file;
 	struct inode *inode;
 	mm_segment_t old_fs;
 	char *page_data = NULL;
 	loff_t offset;
 
+	unionfs_read_lock(file->f_path.dentry->d_sb);
+	err = unionfs_file_revalidate(file, false);
+	if (unlikely(err))
+		goto out;
+	unionfs_check_file(file);
+
 	if (!UNIONFS_F(file)) {
 		err = -ENOENT;
 		goto out;
@@ -191,33 +191,17 @@ static int unionfs_do_readpage(struct file *file, struct page *page)
 
 	flush_dcache_page(page);
 
-out:
-	if (err == 0)
-		SetPageUptodate(page);
-	else
-		ClearPageUptodate(page);
-
-	return err;
-}
-
-static int unionfs_readpage(struct file *file, struct page *page)
-{
-	int err;
-
-	unionfs_read_lock(file->f_path.dentry->d_sb);
-	err = unionfs_file_revalidate(file, false);
-	if (unlikely(err))
-		goto out;
-	unionfs_check_file(file);
-
-	err = unionfs_do_readpage(file, page);
-
 	/*
 	 * we have to unlock our page, b/c we _might_ have gotten a locked
 	 * page.  but we no longer have to wakeup on our page here, b/c
 	 * UnlockPage does it
 	 */
 out:
+	if (err == 0)
+		SetPageUptodate(page);
+	else
+		ClearPageUptodate(page);
+
 ...
From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

No need for this because our readpage calls vfs_read on the lower objects,
which would update the atime as/if needed.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/file.c |    8 --------
 fs/unionfs/mmap.c |    6 ------
 2 files changed, 0 insertions(+), 14 deletions(-)

diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c
index 126df5e..809e0f1 100644
--- a/fs/unionfs/file.c
+++ b/fs/unionfs/file.c
@@ -31,10 +31,6 @@ static ssize_t unionfs_read(struct file *file, char __user *buf,
 
 	err = do_sync_read(file, buf, count, ppos);
 
-	if (err >= 0)
-		touch_atime(unionfs_lower_mnt(file->f_path.dentry),
-			    unionfs_lower_dentry(file->f_path.dentry));
-
 out:
 	unionfs_read_unlock(file->f_path.dentry->d_sb);
 	unionfs_check_file(file);
@@ -58,10 +54,6 @@ static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	if (err == -EIOCBQUEUED)
 		err = wait_on_sync_kiocb(iocb);
 
-	if (err >= 0)
-		touch_atime(unionfs_lower_mnt(file->f_path.dentry),
-			    unionfs_lower_dentry(file->f_path.dentry));
-
 out:
 	unionfs_read_unlock(file->f_path.dentry->d_sb);
 	unionfs_check_file(file);
diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
index bb00fd5..34fd8aa 100644
--- a/fs/unionfs/mmap.c
+++ b/fs/unionfs/mmap.c
@@ -212,12 +212,6 @@ static int unionfs_readpage(struct file *file, struct page *page)
 
 	err = unionfs_do_readpage(file, page);
 
-	if (!err) {
-		touch_atime(unionfs_lower_mnt(file->f_path.dentry),
-			    unionfs_lower_dentry(file->f_path.dentry));
-		unionfs_copy_attr_times(file->f_path.dentry->d_inode);
-	}
-
 	/*
 	 * we have to unlock our page, b/c we _might_ have gotten a locked
 	 * page.  but we no longer have to wakeup on our page here, b/c
-- 
1.5.2.2

-

From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

This is to ensure that the objects we want to check aren't being destroyed
or changed by another thread.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/commonfops.c |    7 ++++---
 fs/unionfs/dentry.c     |    2 +-
 fs/unionfs/file.c       |   10 +++++-----
 fs/unionfs/inode.c      |   25 +++++++++++++------------
 fs/unionfs/mmap.c       |    2 +-
 fs/unionfs/super.c      |    4 ++--
 fs/unionfs/xattr.c      |    8 ++++----
 7 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 624f920..b33917e 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -573,6 +573,7 @@ int unionfs_open(struct inode *inode, struct file *file)
 		}
 	}
 
+	/* XXX: should this unlock be moved to the function bottom? */
 	unionfs_unlock_dentry(dentry);
 
 out:
@@ -582,12 +583,12 @@ out:
 		kfree(UNIONFS_F(file));
 	}
 out_nofree:
-	unionfs_read_unlock(inode->i_sb);
 	unionfs_check_inode(inode);
 	if (!err) {
 		unionfs_check_file(file);
 		unionfs_check_dentry(file->f_path.dentry->d_parent);
 	}
+	unionfs_read_unlock(inode->i_sb);
 	return err;
 }
 
@@ -786,8 +787,8 @@ long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 
 out:
-	unionfs_read_unlock(file->f_path.dentry->d_sb);
 	unionfs_check_file(file);
+	unionfs_read_unlock(file->f_path.dentry->d_sb);
 	return err;
 }
 
@@ -825,7 +826,7 @@ int unionfs_flush(struct file *file, fl_owner_t id)
 	unionfs_copy_attr_times(dentry->d_parent->d_inode);
 
 out:
-	unionfs_read_unlock(dentry->d_sb);
 	unionfs_check_file(file);
+	unionfs_read_unlock(dentry->d_sb);
 	return err;
 }
diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index aed4d39..edea1a4 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -433,11 +433,11 @@ static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	unionfs_lock_dentry(dentry);
 	err = __unionfs_d_revalidate_chain(dentry, nd, ...
From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/mmap.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
index 468dc61..bb00fd5 100644
--- a/fs/unionfs/mmap.c
+++ b/fs/unionfs/mmap.c
@@ -178,7 +178,8 @@ static int unionfs_do_readpage(struct file *file, struct page *page)
 	err = vfs_read(lower_file, page_data, PAGE_CACHE_SIZE,
 		       &lower_file->f_pos);
 	set_fs(old_fs);
-
+	if (err >= 0 && err < PAGE_CACHE_SIZE)
+		memset(page_data + err, 0, PAGE_CACHE_SIZE - err);
 	kunmap(page);
 
 	if (err < 0)
-- 
1.5.2.2

-

From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Don't set/reset the PageUptodate flag on our page.  Call flush_dcache_page
on the lower page after copy_highpage, and set it uptodate.  Call
set_page_dirty right before clear_page_dirty_for_io.

CC: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/mmap.c |   10 ++++++----
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c
index 4b00b98..468dc61 100644
--- a/fs/unionfs/mmap.c
+++ b/fs/unionfs/mmap.c
@@ -28,6 +28,7 @@ static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
 	struct address_space *lower_mapping; /* lower inode mapping */
 	gfp_t mask;
 
+	BUG_ON(!PageUptodate(page));
 	inode = page->mapping->host;
 	lower_inode = unionfs_lower_inode(inode);
 	lower_mapping = lower_inode->i_mapping;
@@ -53,6 +54,8 @@ static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
 
 	/* copy page data from our upper page to the lower page */
 	copy_highpage(lower_page, page);
+	flush_dcache_page(lower_page);
+	SetPageUptodate(lower_page);
 
 	/*
 	 * Call lower writepage (expects locked page).  However, if we are
@@ -68,12 +71,11 @@ static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
 		goto out_release;
 	}
 	BUG_ON(!lower_mapping->a_ops->writepage);
+	set_page_dirty(lower_page);
 	clear_page_dirty_for_io(lower_page); /* emulate VFS behavior */
 	err = lower_mapping->a_ops->writepage(lower_page, wbc);
-	if (err < 0) {
-		ClearPageUptodate(page);
+	if (err < 0)
 		goto out_release;
-	}
 
 	/*
 	 * Lower file systems such as ramfs and tmpfs, may return
@@ -97,7 +99,7 @@ static int unionfs_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	/* all is well */
-	SetPageUptodate(page);
+
 	/* lower mtimes have changed: update ours */
 	unionfs_copy_attr_times(inode);
 
-- 
1.5.2.2

-

From: Erez Zadok
Date: Tuesday, November 13, 2007 - 3:10 am

Don't perform dentry+inode checks unless both are valid.

Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
---
 fs/unionfs/debug.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
index 0066ccd..8464fbb 100644
--- a/fs/unionfs/debug.c
+++ b/fs/unionfs/debug.c
@@ -299,7 +299,7 @@ check_inode:
 	 * be NULL.  But, check that all three are NULL: lower dentry, mnt,
 	 * and inode.
 	 */
-	if (S_ISDIR(inode->i_mode))
+	if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode))
 		for (bindex = dstart+1; bindex < dend; bindex++) {
 			lower_inode = unionfs_lower_inode_idx(inode, bindex);
 			lower_dentry = unionfs_lower_dentry_idx(dentry,
-- 
1.5.2.2

-

Previous thread: [ANNOUNCE] Ceph distributed file system by Sage Weil on Monday, November 12, 2007 - 6:51 pm. (3 messages)

Next thread: [ANNOUNCE] util-linux-ng 2.13.1-rc1 by Karel Zak on Tuesday, November 13, 2007 - 4:24 am. (1 message)