Yes, I tested all cases except for the following case (related to
PATCH 4/5):
I caused invocations of journal_flush() and __log_wait_for_space() and
a write error simultaneously, but I haven't confirmed the race had
occurred.
I found these problems by reading souce codes, then tested them
by the fault injection approach. To inject a fault, I used a
SystemTap script at the bottom of this mail.
What I really want to do is that don't write the commit record when
metadata buffers couldn't be written to the journal.
It should be no problem in the case of writing revoke records failure
because the recovery process detects the invalid control block with
a noncontiguous sequence number.
But it is nonsense to write the commit record even though we failed
to write control blocks to the journal. So I think it makes sense
to catch errors for all writes to the journal here and abort the
journal to avoid writing the commit record.
* * * * * *
The following SystemTap script was used to inject a fault.
Please don't use this script without changing. It is hard-coded
for my environment.
global target_inode_block = 64
/*
* Inject a fault when a particular metadata buffer is journaled.
*/
%{
#include <linux/buffer_head.h>
#include <linux/jbd.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
enum fi_state_bits {
BH_Faulty = BH_Unshadow + 1,
};
%}
function fault_inject (scmd: long) %{
struct scsi_cmnd *cmd = (void *)((unsigned long)THIS->scmd);
cmd->cmnd[0] |= (7 << 5);
cmd->cmd_len = 255;
%}
global do_fault_inject
global faulty_sector
probe module("jbd").function("journal_write_metadata_buffer") {
if ($jh_in->b_bh->b_blocknr == target_inode_block) {
do_fault_inject[tid()] = 1
}
}
probe module("jbd").function("journal_write_metadata_buffer").return {
do_fault_inject[tid()] = 0
}
probe module("jbd").function("journal_file_buffer") {
if (do_fault_inject[tid()] && $jlist == 4 /* BJ_IO */) {
faulty_sector[$jh->b_bh->b_blocknr * 8 + 63] = 1
printf("mark faulty @ sector=%d\n",
$jh->b_bh->b_blocknr * 8 + 63)
}
}
probe kernel.function("scsi_dispatch_cmd") {
host = $cmd->device->host->host_no
id = $cmd->device->id
lun = $cmd->device->lun
ch = $cmd->device->channel
sector = $cmd->request->bio->bi_sector
len = $cmd->transfersize / 512
if (id != 1) {
next
}
printf("%d:%d:%d:%d, #%d+%d\n", host, ch, id, lun, sector, len)
if ($cmd->request->cmd_flags & 1 == 1 && faulty_sector[sector]) {
delete faulty_sector[sector]
fault_inject($cmd)
printf("fault injected\n")
}
}
--
Hidehiro Kawai
Hitachi, Systems Development Laboratory
Linux Technology Center
--