[patch 6/8] fs: fsync optimisations

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Nick Piggin
Date: Friday, December 17, 2010 - 6:46 pm

Optimise fsync by adding a datasync parameter to sync_inode_metadata to
DTRT with writing back the inode (->write_inode in theory should have a
datasync parameter too perhaps, but that's for another time).

Also, implement the metadata sync optimally rather than reusing the
normal data writeback path. This means less useless moving the inode around the
writeback lists, and less dropping and retaking of inode_lock, and avoiding
the data writeback call with nr_pages == 0.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>

Index: linux-2.6/drivers/staging/pohmelfs/inode.c
===================================================================
--- linux-2.6.orig/drivers/staging/pohmelfs/inode.c	2010-12-16 18:29:02.000000000 +1100
+++ linux-2.6/drivers/staging/pohmelfs/inode.c	2010-12-16 18:33:29.000000000 +1100
@@ -883,7 +883,7 @@ static int pohmelfs_fsync(struct file *f
 {
 	struct inode *inode = file->f_mapping->host;
 
-	return sync_inode_metadata(inode, 1);
+	return sync_inode_metadata(inode, datasync, 1);
 }
 
 ssize_t pohmelfs_write(struct file *file, const char __user *buf,
Index: linux-2.6/fs/exofs/file.c
===================================================================
--- linux-2.6.orig/fs/exofs/file.c	2010-12-16 18:33:16.000000000 +1100
+++ linux-2.6/fs/exofs/file.c	2010-12-16 18:33:29.000000000 +1100
@@ -48,7 +48,7 @@ static int exofs_file_fsync(struct file
 	struct inode *inode = filp->f_mapping->host;
 	struct super_block *sb;
 
-	ret = sync_inode_metadata(inode, 1);
+	ret = sync_inode_metadata(inode, datasync, 1);
 
 	/* This is a good place to write the sb */
 	/* TODO: Sechedule an sb-sync on create */
Index: linux-2.6/fs/ext2/dir.c
===================================================================
--- linux-2.6.orig/fs/ext2/dir.c	2010-12-11 20:57:04.000000000 +1100
+++ linux-2.6/fs/ext2/dir.c	2010-12-16 18:33:29.000000000 +1100
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page
 	if (IS_DIRSYNC(dir)) {
 		err = write_one_page(page, 1);
 		if (!err)
-			err = sync_inode_metadata(dir, 1);
+			err = sync_inode_metadata(dir, 0, 1);
 	} else {
 		unlock_page(page);
 	}
Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c	2010-12-16 18:33:25.000000000 +1100
+++ linux-2.6/fs/ext2/inode.c	2010-12-16 18:33:29.000000000 +1100
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *in
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	if (inode_needs_sync(inode)) {
 		sync_mapping_buffers(inode->i_mapping);
-		sync_inode_metadata(inode, 1);
+		sync_inode_metadata(inode, 0, 1);
 	} else {
 		mark_inode_dirty(inode);
 	}
Index: linux-2.6/fs/ext2/xattr.c
===================================================================
--- linux-2.6.orig/fs/ext2/xattr.c	2010-12-11 20:57:04.000000000 +1100
+++ linux-2.6/fs/ext2/xattr.c	2010-12-16 18:33:29.000000000 +1100
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, str
 	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 	inode->i_ctime = CURRENT_TIME_SEC;
 	if (IS_SYNC(inode)) {
-		error = sync_inode_metadata(inode, 1);
+		error = sync_inode_metadata(inode, 0, 1);
 		/* In case sync failed due to ENOSPC the inode was actually
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c	2010-12-16 18:33:22.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c	2010-12-16 18:33:29.000000000 +1100
@@ -1307,7 +1307,7 @@ int sync_inode(struct inode *inode, stru
 EXPORT_SYMBOL(sync_inode);
 
 /**
- * sync_inode - write an inode to disk
+ * sync_inode_metadata - write an inode to disk
  * @inode: the inode to sync
  * @wait: wait for I/O to complete.
  *
@@ -1315,13 +1315,50 @@ EXPORT_SYMBOL(sync_inode);
  *
  * Note: only writes the actual inode, no associated data or other metadata.
  */
-int sync_inode_metadata(struct inode *inode, int wait)
+int sync_inode_metadata(struct inode *inode, int datasync, int wait)
 {
+	struct address_space *mapping = inode->i_mapping;
 	struct writeback_control wbc = {
 		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
 		.nr_to_write = 0, /* metadata-only */
 	};
+	unsigned dirty, mask;
+	int ret = 0;
 
-	return sync_inode(inode, &wbc);
+	/*
+	 * This is a similar implementation to writeback_single_inode.
+	 * Keep them in sync.
+	 */
+	spin_lock(&inode_lock);
+	if (!inode_writeback_begin(inode, wait))
+		goto out;
+
+	if (datasync)
+		mask = I_DIRTY_DATASYNC;
+	else
+		mask = I_DIRTY_SYNC | I_DIRTY_DATASYNC;
+	dirty = inode->i_state & mask;
+	if (!dirty)
+		goto out_wb_end;
+	/*
+	 * Generic write_inode doesn't distinguish between sync and datasync,
+	 * so even a datasync can clear the sync state. Filesystems which
+	 * distiguish these cases must only clear 'mask' in their metadata
+	 * sync code.
+	 */
+	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+
+	spin_unlock(&inode_lock);
+	ret = write_inode(inode, &wbc);
+	spin_lock(&inode_lock);
+	if (ret)
+		inode->i_state |= dirty; /* couldn't write out inode */
+
+out_wb_end:
+	inode_writeback_end(inode);
+
+out:
+	spin_unlock(&inode_lock);
+	return ret;
 }
 EXPORT_SYMBOL(sync_inode_metadata);
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c	2010-12-16 18:33:16.000000000 +1100
+++ linux-2.6/fs/libfs.c	2010-12-16 18:33:29.000000000 +1100
@@ -895,7 +895,7 @@ int generic_file_fsync(struct file *file
 	int ret;
 
 	ret = sync_mapping_buffers(inode->i_mapping);
-	err = sync_inode_metadata(inode, 1);
+	err = sync_inode_metadata(inode, datasync, 1);
 	if (ret == 0)
 		ret = err;
 	return ret;
Index: linux-2.6/fs/nfsd/vfs.c
===================================================================
--- linux-2.6.orig/fs/nfsd/vfs.c	2010-12-16 18:29:06.000000000 +1100
+++ linux-2.6/fs/nfsd/vfs.c	2010-12-16 18:33:29.000000000 +1100
@@ -287,7 +287,7 @@ commit_metadata(struct svc_fh *fhp)
 
 	if (export_ops->commit_metadata)
 		return export_ops->commit_metadata(inode);
-	return sync_inode_metadata(inode, 1);
+	return sync_inode_metadata(inode, 0, 1);
 }
 
 /*
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2010-12-16 18:33:17.000000000 +1100
+++ linux-2.6/include/linux/fs.h	2010-12-16 18:33:29.000000000 +1100
@@ -1765,7 +1765,7 @@ static inline void file_accessed(struct
 }
 
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
-int sync_inode_metadata(struct inode *inode, int wait);
+int sync_inode_metadata(struct inode *inode, int datasync, int wait);
 int inode_writeback_begin(struct inode *inode, int wait);
 int inode_writeback_end(struct inode *inode);
 


--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[patch 0/8] Inode data integrity patches, Nick Piggin, (Fri Dec 17, 6:46 pm)
[patch 1/8] fs: mark_inode_dirty barrier fix, Nick Piggin, (Fri Dec 17, 6:46 pm)
[patch 2/8] fs: simple fsync race fix, Nick Piggin, (Fri Dec 17, 6:46 pm)
[patch 3/8] fs: introduce inode writeback helpers, Nick Piggin, (Fri Dec 17, 6:46 pm)
[patch 5/8] fs: ext2 inode sync fix, Nick Piggin, (Fri Dec 17, 6:46 pm)
[patch 6/8] fs: fsync optimisations, Nick Piggin, (Fri Dec 17, 6:46 pm)
[patch 8/8] fs: add i_op-&gt;sync_inode, Nick Piggin, (Fri Dec 17, 6:46 pm)
Re: [patch 7/8] fs: fix or note I_DIRTY handling bugs in f ..., Christoph Hellwig, (Wed Dec 29, 8:01 am)
Re: [patch 8/8] fs: add i_op-&gt;sync_inode, Christoph Hellwig, (Wed Dec 29, 8:12 am)
Re: [patch 8/8] fs: add i_op-&gt;sync_inode, Nick Piggin, (Mon Jan 3, 11:27 pm)
Re: [patch 8/8] fs: add i_op-&gt;sync_inode, Christoph Hellwig, (Mon Jan 3, 11:57 pm)
Re: [patch 8/8] fs: add i_op-&gt;sync_inode, Nick Piggin, (Tue Jan 4, 1:03 am)
Re: [patch 8/8] fs: add i_op-&gt;sync_inode, Nick Piggin, (Tue Jan 4, 2:49 am)