From: Wendy Cheng <wcheng@redhat.com>
Fix a nasty inode meta data corruption issue by keeping the buffer head in
icache array. This buffer needs to stay in memory until journal flush occurs
Otherwise, gfs2_meta_inode_buffer could do a disk read before the inode hits
disk. It ends up with meta data corruptions. The buffer will be released as
part of the existing journal flush logic.
Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 34f7bcd..013f00b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -244,6 +244,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
return 0;
}
+static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
+{
+ ip->i_cache[0] = bh;
+}
+
/**
* gfs2_inode_refresh - Refresh the incore copy of the dinode
* @ip: The GFS2 inode
@@ -688,7 +693,7 @@ out:
static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
const struct gfs2_inum_host *inum, unsigned int mode,
unsigned int uid, unsigned int gid,
- const u64 *generation, dev_t dev)
+ const u64 *generation, dev_t dev, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
@@ -743,13 +748,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
memset(&di->di_reserved, 0, sizeof(di->di_reserved));
+
+ set_buffer_uptodate(dibh);
- brelse(dibh);
+ *bhp = dibh;
}
static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
unsigned int mode, const struct gfs2_inum_host *inum,
- const u64 *generation, dev_t dev)
+ const u64 *generation, dev_t dev, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
unsigned int uid, gid;
@@ -770,7 +777,7 @@ static int make_dinode(struct gfs2_inode ...From: Steven Whitehouse <swhiteho@redhat.com>
This patch corrects the lock ordering in unlink to be the same as
that in the rest of GFS2, i.e. parent -> child -> rgrp.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 5b8b994..2cbe5a3 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
- error = gfs2_glock_nq_m(3, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
+
+ error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ if (error)
+ goto out_rgrp;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
- goto out_gunlock;
+ goto out_rgrp;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
- goto out_gunlock;
+ goto out_rgrp;
error = gfs2_dir_del(dip, &dentry->d_name);
if (error)
@@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
-out_gunlock:
- gfs2_glock_dq_m(3, ghs);
-out:
- gfs2_holder_uninit(ghs);
- gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs + 2);
+out_rgrp:
gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs);
+out_parent:
+ gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
--
1.5.1.2
-
From: Steven Whitehouse <swhiteho@redhat.com>
This collects together the operations required to remove a gfs2_bufdata
from the ail lists. Its only called from two places to start with, but
expect to see more of this function in future.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7ef6b23..b17346a 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -60,11 +60,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
blkno = bh->b_blocknr;
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
- bd->bd_ail = NULL;
- list_del(&bd->bd_ail_st_list);
- list_del(&bd->bd_ail_gl_list);
- atomic_dec(&gl->gl_ail_count);
- brelse(bh);
+ gfs2_remove_from_ail(NULL, bd);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index d0e6b42..d8232ec 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -60,6 +60,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
}
/**
+ * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
+ * @mapping: The associated mapping (maybe NULL)
+ * @bd: The gfs2_bufdata to remove
+ *
+ * The log lock _must_ be held when calling this function
+ *
+ */
+
+void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
+{
+ bd->bd_ail = NULL;
+ list_del(&bd->bd_ail_st_list);
+ list_del(&bd->bd_ail_gl_list);
+ atomic_dec(&bd->bd_gl->gl_ail_count);
+ if (mapping)
+ gfs2_meta_cache_flush(GFS2_I(mapping->host));
+ brelse(bd->bd_bh);
+}
+
+/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
* @tr: the part of the AIL
@@ -219,21 +239,12 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &ai->ai_ail2_list;
struct gfs2_bufdata *bd;
- struct gfs2_inode *bh_ip;
while (!list_empty(head)) {
bd = list_entry(head->prev, struct gfs2_bufdata,
...From: Steven Whitehouse <swhiteho@redhat.com>
Journaled data is marked dirty by gfs2_unpin and should not be marked
dirty here.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 9b89904..1e56f4d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -95,7 +95,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
set_buffer_uptodate(bh);
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
gfs2_trans_add_bh(ip->i_gl, bh, 0);
- mark_buffer_dirty(bh);
+ if (!gfs2_is_jdata(ip))
+ mark_buffer_dirty(bh);
if (release) {
unlock_page(page);
--
1.5.1.2
-
From: Steven Whitehouse <swhiteho@redhat.com>
gfs2_pin and gfs2_unpin are only used in lops.c, despite being
defined in meta_io.c, so this patch moves them into lops.c and
makes them static. At the same time, its possible to clean up
the locking in the buf and databuf _lo_add() functions so that
we only need to grab the spinlock once. Also we have to move
lock_buffer() around the _lo_add() functions since we can't
do that in gfs2_pin() any more since we hold the spinlock
for the duration of that function.
As a result, the code shrinks by 12 lines and we do far fewer
operations when adding buffers to the log. It also makes the
code somewhat easier to read & understand.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 7ef3356..3ec5871 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,7 +27,71 @@
#include "trans.h"
#include "util.h"
-static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+/**
+ * gfs2_pin - Pin a buffer in memory
+ * @sdp: The superblock
+ * @bh: The buffer to be pinned
+ *
+ * The log lock must be held when calling this function
+ */
+static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+ struct gfs2_bufdata *bd;
+
+ gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
+
+ clear_buffer_dirty(bh);
+ if (test_set_buffer_pinned(bh))
+ gfs2_assert_withdraw(sdp, 0);
+ if (!buffer_uptodate(bh))
+ gfs2_io_error_bh(sdp, bh);
+ bd = bh->b_private;
+ /* If this buffer is in the AIL and it has already been written
+ * to in-place disk block, remove it from the AIL.
+ */
+ if (bd->bd_ail)
+ list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+ get_bh(bh);
+}
+
+/**
+ * gfs2_unpin - Unpin a buffer
+ * @sdp: the filesystem the buffer belongs to
+ * @bh: The buffer to unpin
+ * @ai:
+ *
+ */
+
+static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
+ struct gfs2_ail *ai)
+{
+ struct ...