Re: race in skb_splice_bits?

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Evgeniy Polyakov
Date: Tuesday, May 27, 2008 - 8:09 am

On Tue, May 27, 2008 at 05:39:11PM +0300, Octavian Purdila (opurdila@ixiacom.com) wrote:

Yes, that was somewhat stupid patch.
Since I can not reproduce it anymore, let's try to upload this task to you :)
Please test this one assuming there are no other patches applied before.

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6087013..e2a99a6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1295,7 +1295,7 @@ err:
  * the frag list, if such a thing exists. We'd probably need to recurse to
  * handle that cleanly.
  */
-int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int tlen,
 		    unsigned int flags)
 {
@@ -1308,16 +1308,6 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
 		.ops = &sock_pipe_buf_ops,
 		.spd_release = sock_spd_release,
 	};
-	struct sk_buff *skb;
-
-	/*
-	 * I'd love to avoid the clone here, but tcp_read_sock()
-	 * ignores reference counts and unconditonally kills the sk_buff
-	 * on return from the actor.
-	 */
-	skb = skb_clone(__skb, GFP_KERNEL);
-	if (unlikely(!skb))
-		return -ENOMEM;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
@@ -1341,14 +1331,9 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
 	}
 
 done:
-	/*
-	 * drop our reference to the clone, the pipe consumption will
-	 * drop the rest.
-	 */
-	kfree_skb(skb);
-
 	if (spd.nr_pages) {
 		int ret;
+		struct sock *sk = __skb->sk;
 
 		/*
 		 * Drop the socket lock, otherwise we have reverse
@@ -1359,9 +1344,9 @@ done:
 		 * we call into ->sendpage() with the i_mutex lock held
 		 * and networking will grab the socket lock.
 		 */
-		release_sock(__skb->sk);
+		release_sock(sk);
 		ret = splice_to_pipe(pipe, &spd);
-		lock_sock(__skb->sk);
+		lock_sock(sk);
 		return ret;
 	}
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39b629a..54bb460 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1196,7 +1196,7 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		  sk_read_actor_t recv_actor)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *__skb;
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 seq = tp->copied_seq;
 	u32 offset;
@@ -1204,10 +1204,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 
 	if (sk->sk_state == TCP_LISTEN)
 		return -ENOTCONN;
-	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+	while ((__skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
 		if (offset < skb->len) {
 			size_t used, len;
 
+			skb = skb_clone(__skb, GFP_KERNEL);
+			if (unlikely(!skb))
+				break;
+
 			len = skb->len - offset;
 			/* Stop reading if we hit a patch of urgent data */
 			if (tp->urg_data) {
@@ -1215,29 +1219,35 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 				if (urg_offset < len)
 					len = urg_offset;
 				if (!len)
-					break;
+					goto out_break;
 			}
 			used = recv_actor(desc, skb, offset, len);
 			if (used < 0) {
 				if (!copied)
 					copied = used;
-				break;
+				goto out_break;
 			} else if (used <= len) {
 				seq += used;
 				copied += used;
 				offset += used;
 			}
 			if (offset != skb->len)
-				break;
+				goto out_break;
 		}
 		if (tcp_hdr(skb)->fin) {
 			sk_eat_skb(sk, skb, 0);
 			++seq;
-			break;
+			goto out_break;
 		}
 		sk_eat_skb(sk, skb, 0);
+out:
+		kfree_skb(skb);
 		if (!desc->count)
-			break;
+			goto out_break;
+		continue;
+out_break:
+		kfree_skb(skb);
+		break;
 	}
 	tp->copied_seq = seq;
 


-- 
	Evgeniy Polyakov
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
race in skb_splice_bits?, Octavian Purdila, (Mon May 26, 5:25 pm)
Re: race in skb_splice_bits?, Ben Hutchings, (Mon May 26, 7:08 pm)
Re: race in skb_splice_bits?, Octavian Purdila, (Tue May 27, 3:41 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 4:01 am)
Re: race in skb_splice_bits?, Ben Hutchings, (Tue May 27, 4:08 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 4:52 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 4:56 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Tue May 27, 5:53 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 6:21 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 7:03 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Tue May 27, 7:39 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 8:09 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 8:12 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 8:22 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Tue May 27, 8:33 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 8:47 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Tue May 27, 10:28 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Tue May 27, 4:59 pm)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Wed May 28, 1:52 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Wed May 28, 6:20 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Wed May 28, 7:11 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Wed May 28, 8:20 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Wed May 28, 8:42 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Wed May 28, 10:08 am)
Re: race in skb_splice_bits?, Evgeniy Polyakov, (Wed May 28, 10:51 am)
Re: race in skb_splice_bits?, Octavian Purdila, (Wed May 28, 11:02 am)
Re: race in skb_splice_bits?, Jarek Poplawski, (Wed May 28, 1:01 pm)
Re: race in skb_splice_bits?, Octavian Purdila, (Wed May 28, 1:09 pm)
Re: race in skb_splice_bits?, Jarek Poplawski, (Wed May 28, 1:16 pm)