fs: introduce mnt_clone_write

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Linux Kernel Mailing List
Date: Thursday, June 11, 2009 - 8:59 pm

Gitweb:     http://git.kernel.org/linus/96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef
Commit:     96029c4e09ccbd73a6d0ed2b29e80bf2586ad7ef
Parent:     d3ef3d7351ccfbef3e5d926efc5ee332136f40d4
Author:     npiggin@suse.de <npiggin@suse.de>
AuthorDate: Sun Apr 26 20:25:55 2009 +1000
Committer:  Al Viro <viro@zeniv.linux.org.uk>
CommitDate: Thu Jun 11 21:36:02 2009 -0400

    fs: introduce mnt_clone_write
    
    This patch speeds up lmbench lat_mmap test by about another 2% after the
    first patch.
    
    Before:
     avg = 462.286
     std = 5.46106
    
    After:
     avg = 453.12
     std = 9.58257
    
    (50 runs of each, stddev gives a reasonable confidence)
    
    It does this by introducing mnt_clone_write, which avoids some heavyweight
    operations of mnt_want_write if called on a vfsmount which we know already
    has a write count; and mnt_want_write_file, which can call mnt_clone_write
    if the file is open for write.
    
    After these two patches, mnt_want_write and mnt_drop_write go from 7% on
    the profile down to 1.3% (including mnt_clone_write).
    
    [AV: mnt_want_write_file() should take file alone and derive mnt from it;
    not only all callers have that form, but that's the only mnt about which
    we know that it's already held for write if file is opened for write]
    
    Cc: Dave Hansen <haveblue@us.ibm.com>
    Signed-off-by: Nick Piggin <npiggin@suse.de>
    Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c       |    2 +-
 fs/inode.c            |    2 +-
 fs/namespace.c        |   40 ++++++++++++++++++++++++++++++++++++++++
 fs/open.c             |    4 ++--
 fs/xattr.c            |    4 ++--
 include/linux/mount.h |    4 ++++
 6 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index 54018fe..3d66dbc 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
 		file_take_write(file);
-		error = mnt_want_write(mnt);
+		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
 	return error;
diff --git a/fs/inode.c b/fs/inode.c
index ca33701..a88baeb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1422,7 +1422,7 @@ void file_update_time(struct file *file)
 	if (IS_NOCMTIME(inode))
 		return;
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		return;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 22ae06a..120b8a6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -265,6 +265,46 @@ out:
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
 /**
+ * mnt_clone_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This is effectively like mnt_want_write, except
+ * it must only be used to take an extra write reference
+ * on a mountpoint that we already know has a write reference
+ * on it. This allows some optimisation.
+ *
+ * After finished, mnt_drop_write must be called as usual to
+ * drop the reference.
+ */
+int mnt_clone_write(struct vfsmount *mnt)
+{
+	/* superblock may be r/o */
+	if (__mnt_is_readonly(mnt))
+		return -EROFS;
+	preempt_disable();
+	inc_mnt_writers(mnt);
+	preempt_enable();
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mnt_clone_write);
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+	if (!(file->f_mode & FMODE_WRITE))
+		return mnt_want_write(file->f_path.mnt);
+	else
+		return mnt_clone_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL_GPL(mnt_want_write_file);
+
+/**
  * mnt_drop_write - give up write access to a mount
  * @mnt: the mount on which to give up write access
  *
diff --git a/fs/open.c b/fs/open.c
index bdfbf03..7200e23 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
 
 	audit_inode(NULL, dentry);
 
-	err = mnt_want_write(file->f_path.mnt);
+	err = mnt_want_write_file(file);
 	if (err)
 		goto out_putf;
 	mutex_lock(&inode->i_mutex);
@@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	if (!file)
 		goto out;
 
-	error = mnt_want_write(file->f_path.mnt);
+	error = mnt_want_write_file(file);
 	if (error)
 		goto out_fput;
 	dentry = file->f_path.dentry;
diff --git a/fs/xattr.c b/fs/xattr.c
index d51b8f9..1c3d0af 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = setxattr(dentry, name, value, size, flags);
 		mnt_drop_write(f->f_path.mnt);
@@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 		return error;
 	dentry = f->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write(f->f_path.mnt);
+	error = mnt_want_write_file(f);
 	if (!error) {
 		error = removexattr(dentry, name);
 		mnt_drop_write(f->f_path.mnt);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index ac49c1f..5d52753 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -88,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
 	return mnt;
 }
 
+struct file; /* forward dec */
+
 extern int mnt_want_write(struct vfsmount *mnt);
+extern int mnt_want_write_file(struct file *file);
+extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput_no_expire(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
--
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
fs: introduce mnt_clone_write, Linux Kernel Mailing ..., (Thu Jun 11, 8:59 pm)