[34-longterm 013/260] ext4: Do not zero out uninitialized extents beyond i_size

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Paul Gortmaker
Date: Sunday, January 2, 2011 - 12:15 am

From: Dmitry Monakhov <dmonakhov@openvz.org>

commit 21ca087a3891efab4d45488db8febee474d26c68 upstream.

The extents code will sometimes zero out blocks and mark them as
initialized instead of splitting an extent into several smaller ones.
This optimization however, causes problems if the extent is beyond
i_size because fsck will complain if there are uninitialized blocks
after i_size as this can not be distinguished from an inode that has
an incorrect i_size field.

https://bugzilla.kernel.org/show_bug.cgi?id=15742

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 fs/ext4/extents.c |   67 ++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 228eeaf..ee611da 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2631,11 +2631,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
 	int ret = 0;
+	int may_zeroout;
+
+	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+		"block %llu, max_blocks %u\n", inode->i_ino,
+		(unsigned long long)iblock, max_blocks);
+
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
+	if (eof_block < iblock + max_blocks)
+		eof_block = iblock + max_blocks;
 
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
@@ -2644,16 +2654,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
+	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= eof_block;
+
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
 		goto out;
 	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2684,7 +2701,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	if (allocated > max_blocks) {
 		unsigned int newdepth;
 		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-		if (allocated <= EXT4_EXT_ZERO_LEN) {
+		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
 			/*
 			 * iblock == ee_block is handled by the zerouout
 			 * at the beginning.
@@ -2760,7 +2777,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2784,8 +2801,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * update the extent length after successful insert of the
 		 * split extent
 		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
+		ee_len -= ext4_ext_get_actual_len(ex3);
+		orig_ex.ee_len = cpu_to_le16(ee_len);
+		may_zeroout = ee_block + ee_len <= eof_block;
+
 		depth = newdepth;
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode, iblock, path);
@@ -2809,7 +2828,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		 * otherwise give the extent a chance to merge to left
 		 */
 		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-							iblock != ee_block) {
+			iblock != ee_block && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -2878,7 +2897,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
@@ -2938,14 +2957,21 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	struct ext4_extent *ex2 = NULL;
 	struct ext4_extent *ex3 = NULL;
 	struct ext4_extent_header *eh;
-	ext4_lblk_t ee_block;
+	ext4_lblk_t ee_block, eof_block;
 	unsigned int allocated, ee_len, depth;
 	ext4_fsblk_t newblock;
 	int err = 0;
+	int may_zeroout;
+
+	ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
+		"block %llu, max_blocks %u\n", inode->i_ino,
+		(unsigned long long)iblock, max_blocks);
+
+	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+		inode->i_sb->s_blocksize_bits;
+	if (eof_block < iblock + max_blocks)
+		eof_block = iblock + max_blocks;
 
-	ext_debug("ext4_split_unwritten_extents: inode %lu,"
-		  "iblock %llu, max_blocks %u\n", inode->i_ino,
-		  (unsigned long long)iblock, max_blocks);
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
@@ -2953,12 +2979,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (iblock - ee_block);
 	newblock = iblock - ee_block + ext_pblock(ex);
+
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
 	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
 	/*
+	 * It is safe to convert extent to initialized via explicit
+	 * zeroout only if extent is fully insde i_size or new_size.
+	 */
+	may_zeroout = ee_block + ee_len <= eof_block;
+
+	/*
  	 * If the uninitialized extent begins at the same logical
  	 * block where the write begins, and the write completely
  	 * covers the extent, then we don't need to split it.
@@ -2992,7 +3025,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
 		ext4_ext_mark_uninitialized(ex3);
 		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-		if (err == -ENOSPC) {
+		if (err == -ENOSPC && may_zeroout) {
 			err =  ext4_ext_zeroout(inode, &orig_ex);
 			if (err)
 				goto fix_extent_len;
@@ -3016,8 +3049,10 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 		 * update the extent length after successful insert of the
 		 * split extent
 		 */
-		orig_ex.ee_len = cpu_to_le16(ee_len -
-						ext4_ext_get_actual_len(ex3));
+		ee_len -= ext4_ext_get_actual_len(ex3);
+		orig_ex.ee_len = cpu_to_le16(ee_len);
+		may_zeroout = ee_block + ee_len <= eof_block;
+
 		depth = newdepth;
 		ext4_ext_drop_refs(path);
 		path = ext4_ext_find_extent(inode, iblock, path);
@@ -3063,7 +3098,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	goto out;
 insert:
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-	if (err == -ENOSPC) {
+	if (err == -ENOSPC && may_zeroout) {
 		err =  ext4_ext_zeroout(inode, &orig_ex);
 		if (err)
 			goto fix_extent_len;
-- 
1.7.3.3

--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[34-longterm 000/260] v2.6.34.8 longterm review, Paul Gortmaker, (Sun Jan 2, 12:14 am)
[34-longterm 003/260] ath5k: drop warning on jumbo frames, Paul Gortmaker, (Sun Jan 2, 12:14 am)
[34-longterm 013/260] ext4: Do not zero out uninitialized ..., Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 019/260] ext4: Show journal_checksum option, Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 025/260] ext4: Fix compat EXT4_IOC_ADD_GROUP, Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 028/260] ext4: fix freeze deadlock under IO, Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 030/260] xen: handle events as edge-triggered, Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 045/260] USB: ehci-ppc-of: problems in unwind, Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 047/260] USB: CP210x Add new device ID, Paul Gortmaker, (Sun Jan 2, 12:15 am)
[34-longterm 065/260] irda: off by one, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 089/260] sched: Optimize task_rq_lock(), Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 090/260] sched: Fix nr_uninterruptible count, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 093/260] sched: Fix select_idle_sibling(), Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 098/260] arm: fix really nasty sigreturn bug, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 106/260] drm/i915: Prevent double dpms on, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 110/260] gro: fix different skb headrooms, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 111/260] gro: Re-fix different skb headrooms, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 115/260] tcp: fix three tcp sysctls tuning, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 118/260] rds: fix a leak of kernel memory, Paul Gortmaker, (Sun Jan 2, 12:16 am)
[34-longterm 126/260] Staging: vt6655: fix buffer overflow, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 134/260] percpu: fix pcpu_last_unit_cpu, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 136/260] inotify: send IN_UNMOUNT events, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 139/260] fix siglock, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 145/260] AT91: change dma resource index, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 154/260] inotify: fix inotify oneshot support, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 158/260] alpha: Fix printk format errors, Paul Gortmaker, (Sun Jan 2, 12:17 am)
[34-longterm 188/260] atl1: fix resume, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 190/260] De-pessimize rds_page_copy_user, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 192/260] xfrm4: strip ECN bits from tos field, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 193/260] tcp: Fix &gt;4GB writes on 64-bit., Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 199/260] tcp: Fix race in tcp_poll, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 200/260] netxen: dont set skb-&gt;truesize, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 203/260] skge: add quirk to limit DMA, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 208/260] b44: fix carrier detection on bind, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 224/260] bluetooth: Fix missing NULL check, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 236/260] KVM: x86: Fix SVM VMCB reset, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 240/260] p54usb: fix off-by-one on !CONFIG_PM, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 241/260] p54usb: add five more USBIDs, Paul Gortmaker, (Sun Jan 2, 12:18 am)
[34-longterm 256/260] libsas: fix NCQ mixing with non-NCQ, Paul Gortmaker, (Sun Jan 2, 12:19 am)
[34-longterm 257/260] gdth: integer overflow in ioctl, Paul Gortmaker, (Sun Jan 2, 12:19 am)
[34-longterm 258/260] Fix race when removing SCSI devices, Paul Gortmaker, (Sun Jan 2, 12:19 am)
Re: [34-longterm 000/260] v2.6.34.8 longterm review, Paul Gortmaker, (Sun Jan 2, 3:46 am)
Re: [34-longterm 000/260] v2.6.34.8 longterm review, Jiri Slaby, (Mon Jan 3, 3:41 am)
Re: [34-longterm 000/260] v2.6.34.8 longterm review, Paul Gortmaker, (Tue Jan 4, 12:11 pm)