[PATCHv2] netns: oops in ip_frag_reasm incrementing stats

Previous thread: Grant by Fondation De France on Friday, March 13, 2009 - 7:42 am. (1 message)

Next thread: [PATCH] 8139too: allow to set mac address on running device by Jiri Pirko on Friday, March 13, 2009 - 10:58 am. (2 messages)
From: Jorge Boncompte [DTI2]
Date: Friday, March 13, 2009 - 9:21 am

skb->dev can be NULL on ip_frag_reasm for skb's coming from RAW sockets.

Quagga's OSPFD sends fragmented packets on a RAW socket, when netfilter
conntrack reassembles them on the OUTPUT path you hit this code path.

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
---
  net/ipv4/ip_fragment.c |   14 +++++++-------
  1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 6659ac0..8f150d5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -84,7 +84,7 @@ int ip_frag_mem(struct net *net)
  	return atomic_read(&net->ipv4.frags.mem);
  }

-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct net *net, struct ipq *qp, struct 
sk_buff *prev,
  			 struct net_device *dev);

  struct ip4_create_arg {
@@ -296,7 +296,7 @@ static int ip_frag_reinit(struct ipq *qp)
  }

  /* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct net *net, struct ipq *qp, struct 
sk_buff *skb)
  {
  	struct sk_buff *prev, *next;
  	struct net_device *dev;
@@ -445,7 +445,7 @@ static int ip_frag_queue(struct ipq *qp, struct 
sk_buff *skb)

  	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
  	    qp->q.meat == qp->q.len)
-		return ip_frag_reasm(qp, prev, dev);
+		return ip_frag_reasm(net, qp, prev, dev);

  	write_lock(&ip4_frags.lock);
  	list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
@@ -460,7 +460,7 @@ err:

  /* Build a new IP datagram from all its fragments. */

-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct net *net, struct ipq *qp, struct 
sk_buff *prev,
  			 struct net_device *dev)
  {
  	struct iphdr *iph;
@@ -548,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct 
sk_buff *prev,
  	iph = ip_hdr(head);
  	iph->frag_off = 0;
  	iph->tot_len = htons(len);
-	IP_INC_STATS_BH(dev_net(dev), ...
From: Jorge Boncompte [DTI2]
Date: Friday, March 13, 2009 - 9:35 am

dev can be NULL on ip_frag_reasm for skb's coming from RAW sockets.

Quagga's OSPFD sends fragmented packets on a RAW socket, when netfilter
conntrack reassembles them on the OUTPUT path you hit this code path.

Changes from v1:
	- Fixed description

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
---
  net/ipv4/ip_fragment.c |   14 +++++++-------
  1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 6659ac0..8f150d5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -84,7 +84,7 @@ int ip_frag_mem(struct net *net)
  	return atomic_read(&net->ipv4.frags.mem);
  }

-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct net *net, struct ipq *qp, struct 
sk_buff *prev,
  			 struct net_device *dev);

  struct ip4_create_arg {
@@ -296,7 +296,7 @@ static int ip_frag_reinit(struct ipq *qp)
  }

  /* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct net *net, struct ipq *qp, struct 
sk_buff *skb)
  {
  	struct sk_buff *prev, *next;
  	struct net_device *dev;
@@ -445,7 +445,7 @@ static int ip_frag_queue(struct ipq *qp, struct 
sk_buff *skb)

  	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
  	    qp->q.meat == qp->q.len)
-		return ip_frag_reasm(qp, prev, dev);
+		return ip_frag_reasm(net, qp, prev, dev);

  	write_lock(&ip4_frags.lock);
  	list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
@@ -460,7 +460,7 @@ err:

  /* Build a new IP datagram from all its fragments. */

-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct net *net, struct ipq *qp, struct 
sk_buff *prev,
  			 struct net_device *dev)
  {
  	struct iphdr *iph;
@@ -548,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct 
sk_buff *prev,
  	iph = ip_hdr(head);
  	iph->frag_off = 0;
  	iph->tot_len = ...
From: Jorge Boncompte [DTI2]
Date: Monday, March 16, 2009 - 5:09 am

dev can be NULL on ip_frag_reasm for skb's coming from RAW sockets.

Quagga's OSPFD sends fragmented packets on a RAW socket, when netfilter
conntrack reassembles them on the OUTPUT path you hit this code path.

Changes from v1:
   - Fixed description

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
---
net/ipv4/ip_fragment.c |   14 +++++++-------
1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 6659ac0..8f150d5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -84,7 +84,7 @@ int ip_frag_mem(struct net *net)
	return atomic_read(&net->ipv4.frags.mem);
}

-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct net *net, struct ipq *qp, struct sk_buff *prev,
			 struct net_device *dev);

struct ip4_create_arg {
@@ -296,7 +296,7 @@ static int ip_frag_reinit(struct ipq *qp)
}

/* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct net *net, struct ipq *qp, struct sk_buff *skb)
{
	struct sk_buff *prev, *next;
	struct net_device *dev;
@@ -445,7 +445,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)

	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
	    qp->q.meat == qp->q.len)
-		return ip_frag_reasm(qp, prev, dev);
+		return ip_frag_reasm(net, qp, prev, dev);

	write_lock(&ip4_frags.lock);
	list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
@@ -460,7 +460,7 @@ err:

/* Build a new IP datagram from all its fragments. */

-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct net *net, struct ipq *qp, struct sk_buff *prev,
			 struct net_device *dev)
{
	struct iphdr *iph;
@@ -548,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
	iph = ip_hdr(head);
	iph->frag_off = 0;
	iph->tot_len = htons(len);
-	IP_INC_STATS_BH(dev_net(dev), ...
From: Jarek Poplawski
Date: Monday, March 16, 2009 - 2:05 pm

I didn't check this but isn't something like this possible here too?:

static inline int ip_frag_too_far(struct ipq *qp)
{
	...
        net = container_of(qp->q.net, struct net, ipv4.frags);
        IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);

Jarek P.
--

From: Jorge Boncompte [DTI2]
Date: Monday, March 16, 2009 - 2:53 pm

Yes, it seems so. I did not noticed how the rest of the code accessed
the net pointer, sorry.
	Do you want to send a patch yourself or should I do it?

	Regards,

	Jorge
-- 
==============================================================
Jorge Boncompte - Ingenieria y Gestion de RED
DTI2 - Desarrollo de la Tecnologia de las Comunicaciones
--------------------------------------------------------------
C/ Abogado Enriquez Barrios, 5   14004 CORDOBA (SPAIN)
Tlf: +34 957 761395 / FAX: +34 957 450380
==============================================================
- Sin pistachos no hay Rock & Roll...
- Without wicker a basket cannot be made.
==============================================================

--

From: Jarek Poplawski
Date: Monday, March 16, 2009 - 3:05 pm

You did the whole work so I hope you'll finish this (after checking
and maybe some testing)!

Regards,
Jarek P.
--

From: Jarek Poplawski
Date: Monday, March 16, 2009 - 3:46 pm

On Mon, Mar 16, 2009 at 10:53:14PM +0100, Jorge Boncompte [DTI2] wrote:

BTW, it seems you're expected to this for ipv6 too. ;-)

Jarek P.
--

From: Jorge Boncompte [DTI2]
Date: Tuesday, March 17, 2009 - 4:55 am

dev can be NULL in ip[6]_frag_reasm for skb's coming from RAW sockets.

Quagga's OSPFD sends fragmented packets on a RAW socket, when netfilter
conntrack reassembles them on the OUTPUT path you hit this code path.

You can test it with something like "hping2 -0 -d 2000 -f AA.BB.CC.DD"

Changes from v2: (address comments from Jarek Poplawski)
	- Patch reworked to get the net pointer with container_of()
	  instead of passing it to function calls.
	- Fix IPv6 code
Changes from v1:
	- Fixed description

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
---
 net/ipv4/ip_fragment.c |    5 +++--
 net/ipv6/reassembly.c  |    7 +++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 6659ac0..4806e33 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -463,6 +463,7 @@ err:
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			 struct net_device *dev)
 {
+	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
 	struct iphdr *iph;
 	struct sk_buff *fp, *head = qp->q.fragments;
 	int len;
@@ -548,7 +549,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
-	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
 	return 0;
 
@@ -562,7 +563,7 @@ out_oversize:
 		printk(KERN_INFO "Oversized IP packet from %pI4.\n",
 			&qp->saddr);
 out_fail:
-	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
 	return err;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3c57511..e9ac7a1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -452,6 +452,7 @@ err:
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev)
 {
+	struct net *net = container_of(fq->q.net, struct net, ...
From: Jarek Poplawski
Date: Tuesday, March 17, 2009 - 6:21 am

I guess David will be interested only with the final state of changes,
so v1 & v2 are not necessary here...

Anyway, ipv4 looks OK to me, but ipv6 looks like something is

It still depends on dev != NULL in __in6_dev_get(). I see there
is also used skb->dst for similar things in ip6_frag_queue(), so I
don't know: it needs rethinking, and maybe these patches should be
separated if you prefer.

Thanks,
--

From: Jorge Boncompte [DTI2]
Date: Tuesday, March 17, 2009 - 6:54 am

Not my day! :-) I should not look at code at 2 am and write patches
the day after, I confused _idev and idev in the check for != NULL in _DEVINC.

	I think this bug was first introduced by patch "[IPV6]: Per-interface
statistics support." from YOSHIFUJI Hideaki on Nov 4, 2006.

	If someone with more knowledge could confirm that using something like...

"(skb->dev ? skb->dev : skb->dst->dev)"

... here is fine I'll redo this part and resend. I do not have an IPv6 setup where
I can test this.

	Regards,

	Jorge
-- 
==============================================================
Jorge Boncompte - Ingenieria y Gestion de RED
DTI2 - Desarrollo de la Tecnologia de las Comunicaciones
--------------------------------------------------------------
C/ Abogado Enriquez Barrios, 5   14004 CORDOBA (SPAIN)
Tlf: +34 957 761395 / FAX: +34 957 450380
==============================================================
- Sin pistachos no hay Rock & Roll...
- Without wicker a basket cannot be made.
==============================================================

--

From: Jarek Poplawski
Date: Wednesday, March 18, 2009 - 12:26 am

skb->dst->dev is used in ip6_frag_queue anyway, so it shouldn't be
worse if you do this similarly in ip6_frag_reasm. If you send it as
a separate patch, and write it's not tested, David will decide if he
wants it. Otherwise you can resend this ipv4 patch only.

Jarek P.
--

From: David Miller
Date: Wednesday, March 18, 2009 - 11:26 pm

From: Jarek Poplawski <jarkao2@gmail.com>

Netfilter on ipv6 handles reassembly differently and in a way
that won't result in dev being NULL here.

I've applied Jorge's patch, thanks everyone.
--

From: Jarek Poplawski
Date: Thursday, March 19, 2009 - 2:54 pm

My proposal is to revert the ipv6/reassembly part of this patch yet;
it doesn't fix anything, and the changelog is only misleading if it's
like you said.

Jarek P.
--

From: David Miller
Date: Thursday, March 19, 2009 - 2:56 pm

From: Jarek Poplawski <jarkao2@gmail.com>

I've already pushed the change out to net-2.6 so I'd really prefer not
to do that.  Make your desires known more emphatically before a patch
has been sitting around for days.

At worst it's a consistency cleanup with an inaccurate commit message,
reverting will only make the situation more confusing and ugly.
--

From: Jarek Poplawski
Date: Thursday, March 19, 2009 - 3:09 pm

I guess you should re-read the thread: if not me, the author of this

Sure, no big deal.

Jarek P.
--

From: David Miller
Date: Friday, March 13, 2009 - 11:46 am

From: "Jorge Boncompte [DTI2]" <jorge@dti2.net>

Your patch was corrupted by your email client, please fix this up and
resubmit.
--

Previous thread: Grant by Fondation De France on Friday, March 13, 2009 - 7:42 am. (1 message)

Next thread: [PATCH] 8139too: allow to set mac address on running device by Jiri Pirko on Friday, March 13, 2009 - 10:58 am. (2 messages)