[PATCH 31/51] [GFS2] fix inode meta data corruption

Previous thread: [GFS2/DLM] Pre-pull patch posting by swhiteho on Thursday, October 4, 2007 - 1:48 am. (32 messages)

Next thread: none
From: swhiteho
Date: Thursday, October 4, 2007 - 1:49 am

From: Wendy Cheng <wcheng@redhat.com>

Fix a nasty inode meta data corruption issue by keeping the buffer head in
icache array. This buffer needs to stay in memory until journal flush occurs
Otherwise, gfs2_meta_inode_buffer could do a disk read before the inode hits
disk. It ends up with meta data corruptions. The buffer will be released as
part of the existing journal flush logic.

Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 34f7bcd..013f00b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -244,6 +244,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	return 0;
 }
 
+static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
+{
+	ip->i_cache[0] = bh;
+}
+
 /**
  * gfs2_inode_refresh - Refresh the incore copy of the dinode
  * @ip: The GFS2 inode
@@ -688,7 +693,7 @@ out:
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 			const struct gfs2_inum_host *inum, unsigned int mode,
 			unsigned int uid, unsigned int gid,
-			const u64 *generation, dev_t dev)
+			const u64 *generation, dev_t dev, struct buffer_head **bhp)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_dinode *di;
@@ -743,13 +748,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
 	di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));
+	
+	set_buffer_uptodate(dibh);
 
-	brelse(dibh);
+	*bhp = dibh;
 }
 
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 		       unsigned int mode, const struct gfs2_inum_host *inum,
-		       const u64 *generation, dev_t dev)
+		       const u64 *generation, dev_t dev, struct buffer_head **bhp)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	unsigned int uid, gid;
@@ -770,7 +777,7 @@ static int make_dinode(struct gfs2_inode ...
From: swhiteho
Date: Thursday, October 4, 2007 - 1:49 am

From: Steven Whitehouse <swhiteho@redhat.com>

This patch corrects the lock ordering in unlink to be the same as
that in the rest of GFS2, i.e. parent -> child -> rgrp.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 5b8b994..2cbe5a3 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
 
-	error = gfs2_glock_nq_m(3, ghs);
+	error = gfs2_glock_nq(ghs); /* parent */
 	if (error)
-		goto out;
+		goto out_parent;
+
+	error = gfs2_glock_nq(ghs + 1); /* child */
+	if (error)
+		goto out_child;
+
+	error = gfs2_glock_nq(ghs + 2); /* rgrp */
+	if (error)
+		goto out_rgrp;
 
 	error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
 	if (error)
-		goto out_gunlock;
+		goto out_rgrp;
 
 	error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
 	if (error)
-		goto out_gunlock;
+		goto out_rgrp;
 
 	error = gfs2_dir_del(dip, &dentry->d_name);
         if (error)
@@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 
 out_end_trans:
 	gfs2_trans_end(sdp);
-out_gunlock:
-	gfs2_glock_dq_m(3, ghs);
-out:
-	gfs2_holder_uninit(ghs);
-	gfs2_holder_uninit(ghs + 1);
+	gfs2_glock_dq(ghs + 2);
+out_rgrp:
 	gfs2_holder_uninit(ghs + 2);
+	gfs2_glock_dq(ghs + 1);
+out_child:
+	gfs2_holder_uninit(ghs + 1);
+	gfs2_glock_dq(ghs);
+out_parent:
+	gfs2_holder_uninit(ghs);
 	gfs2_glock_dq_uninit(&ri_gh);
 	return error;
 }
-- 
1.5.1.2

-

From: swhiteho
Date: Thursday, October 4, 2007 - 1:49 am

From: Steven Whitehouse <swhiteho@redhat.com>

This collects together the operations required to remove a gfs2_bufdata
from the ail lists. Its only called from two places to start with, but
expect to see more of this function in future.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7ef6b23..b17346a 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -60,11 +60,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 		blkno = bh->b_blocknr;
 		gfs2_assert_withdraw(sdp, !buffer_busy(bh));
 
-		bd->bd_ail = NULL;
-		list_del(&bd->bd_ail_st_list);
-		list_del(&bd->bd_ail_gl_list);
-		atomic_dec(&gl->gl_ail_count);
-		brelse(bh);
+		gfs2_remove_from_ail(NULL, bd);
 		gfs2_log_unlock(sdp);
 
 		gfs2_trans_add_revoke(sdp, blkno);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index d0e6b42..d8232ec 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -60,6 +60,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 }
 
 /**
+ * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
+ * @mapping: The associated mapping (maybe NULL)
+ * @bd: The gfs2_bufdata to remove
+ *
+ * The log lock _must_ be held when calling this function
+ *
+ */
+
+void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
+{
+	bd->bd_ail = NULL;
+	list_del(&bd->bd_ail_st_list);
+	list_del(&bd->bd_ail_gl_list);
+	atomic_dec(&bd->bd_gl->gl_ail_count);
+	if (mapping)
+		gfs2_meta_cache_flush(GFS2_I(mapping->host));
+	brelse(bd->bd_bh);
+}
+
+/**
  * gfs2_ail1_start_one - Start I/O on a part of the AIL
  * @sdp: the filesystem
  * @tr: the part of the AIL
@@ -219,21 +239,12 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 {
 	struct list_head *head = &ai->ai_ail2_list;
 	struct gfs2_bufdata *bd;
-	struct gfs2_inode *bh_ip;
 
 	while (!list_empty(head)) {
 		bd = list_entry(head->prev, struct gfs2_bufdata,
 ...
From: swhiteho
Date: Thursday, October 4, 2007 - 1:49 am

From: Steven Whitehouse <swhiteho@redhat.com>

Journaled data is marked dirty by gfs2_unpin and should not be marked
dirty here.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9b89904..1e56f4d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -95,7 +95,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 	set_buffer_uptodate(bh);
 	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
-	mark_buffer_dirty(bh);
+	if (!gfs2_is_jdata(ip))
+		mark_buffer_dirty(bh);
 
 	if (release) {
 		unlock_page(page);
-- 
1.5.1.2

-

From: swhiteho
Date: Thursday, October 4, 2007 - 1:49 am

From: Steven Whitehouse <swhiteho@redhat.com>

gfs2_pin and gfs2_unpin are only used in lops.c, despite being
defined in meta_io.c, so this patch moves them into lops.c and
makes them static. At the same time, its possible to clean up
the locking in the buf and databuf _lo_add() functions so that
we only need to grab the spinlock once. Also we have to move
lock_buffer() around the _lo_add() functions since we can't
do that in gfs2_pin() any more since we hold the spinlock
for the duration of that function.

As a result, the code shrinks by 12 lines and we do far fewer
operations when adding buffers to the log. It also makes the
code somewhat easier to read & understand.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 7ef3356..3ec5871 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,7 +27,71 @@
 #include "trans.h"
 #include "util.h"
 
-static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+/**
+ * gfs2_pin - Pin a buffer in memory
+ * @sdp: The superblock
+ * @bh: The buffer to be pinned
+ *
+ * The log lock must be held when calling this function
+ */
+static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+	struct gfs2_bufdata *bd;
+
+	gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
+
+	clear_buffer_dirty(bh);
+	if (test_set_buffer_pinned(bh))
+		gfs2_assert_withdraw(sdp, 0);
+	if (!buffer_uptodate(bh))
+		gfs2_io_error_bh(sdp, bh);
+	bd = bh->b_private;
+	/* If this buffer is in the AIL and it has already been written
+	 * to in-place disk block, remove it from the AIL.
+	 */
+	if (bd->bd_ail)
+		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+	get_bh(bh);
+}
+
+/**
+ * gfs2_unpin - Unpin a buffer
+ * @sdp: the filesystem the buffer belongs to
+ * @bh: The buffer to unpin
+ * @ai:
+ *
+ */
+
+static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
+		       struct gfs2_ail *ai)
+{
+	struct ...
Previous thread: [GFS2/DLM] Pre-pull patch posting by swhiteho on Thursday, October 4, 2007 - 1:48 am. (32 messages)

Next thread: none