> Le mardi 24 août 2010 à 16:27 +0300, Plamen Petrov a écrit :
>
>> The current status: if I enable GRO on the tg3 - the kernel oopses.
>> It just takes a different amount of time to trigger: somewhere from
>> 30 seconds to 30 minutes.
>>
>> The oopses looks the same, and here are the latest:
>>
>> [picture 13]
>>
http://picpaste.com/c8dbda8f5c15d9ce3e050dd7f245f5d0.jpg
>>
>> [picture 14]
>>
http://picpaste.com/646cca586b704c5b72d3cf9fa54c7344.jpg
>>
>> I was wondering which debug options could help us track this down?
>>
>
> Thanks, here is an updated patch (against linux-2.6)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 3721fbb..77c8eb7 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1935,6 +1935,32 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
> illegal_highdma(dev, skb))));
> }
>
> +int skb_csum_start_bug(const struct sk_buff *skb, int pos)
> +{
> +
> + if (skb->ip_summed == CHECKSUM_PARTIAL) {
> + long csstart;
> +
> + csstart = skb->csum_start - skb_headroom(skb);
> + if (WARN_ON(csstart > skb_headlen(skb))) {
> + int i;
> +
> + pr_err("%d: csum_start %u, offset %u, headroom %d, headlen %d, len %d\n",
> + pos, skb->csum_start, skb->csum_offset, skb_headroom(skb),
> + skb_headlen(skb), skb->len);
> + pr_err("nr_frags=%u gso_size=%u ",
> + skb_shinfo(skb)->nr_frags,
> + skb_shinfo(skb)->gso_size);
> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
> + pr_err("frag_size=%u ", skb_shinfo(skb)->frags[i].size);
> + }
> + pr_err("\n");
> + return 1;
> + }
> + }
> + return 0;
> +}
> +
> int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
> struct netdev_queue *txq)
> {
> @@ -1959,11 +1985,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
> goto out_kfree_skb;
> if (skb->next)
> goto gso;
> + if (skb_csum_start_bug(skb, 10))
> + goto out_kfree_skb;
> } else {
> if (skb_needs_linearize(skb, dev) &&
> __skb_linearize(skb))
> goto out_kfree_skb;
>
> + if (skb_csum_start_bug(skb, 20))
> + goto out_kfree_skb;
> /* If packet is not checksummed and device does not
> * support checksumming for this protocol, complete
> * checksumming here.
> @@ -1974,10 +2004,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
> if (!dev_can_checksum(dev, skb) &&
> skb_checksum_help(skb))
> goto out_kfree_skb;
> + if (skb_csum_start_bug(skb, 30))
> + goto out_kfree_skb;
> }
> }
>
> - rc = ops->ndo_start_xmit(skb, dev);
> + if (skb_csum_start_bug(skb, 40)) {
> + kfree_skb(skb);
> + rc = NETDEV_TX_OK;
> + } else
> + rc = ops->ndo_start_xmit(skb, dev);
> if (rc == NETDEV_TX_OK)
> txq_trans_update(txq);
> return rc;
> @@ -1997,7 +2033,12 @@ gso:
> if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
> skb_dst_drop(nskb);
>
> - rc = ops->ndo_start_xmit(nskb, dev);
> + if (skb_csum_start_bug(skb, 50)) {
> + kfree_skb(skb);
> + rc = NETDEV_TX_OK;
> + } else
> + rc = ops->ndo_start_xmit(nskb, dev);
> +
> if (unlikely(rc != NETDEV_TX_OK)) {
> if (rc & ~NETDEV_TX_MASK)
> goto out_kfree_gso_skb;
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 3a2513f..3d54a1b 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1824,13 +1824,15 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
> {
> __wsum csum;
> long csstart;
> + extern int skb_csum_start_bug(const struct sk_buff *skb, int pos);
>
> if (skb->ip_summed == CHECKSUM_PARTIAL)
> csstart = skb->csum_start - skb_headroom(skb);
> else
> csstart = skb_headlen(skb);
>
> - BUG_ON(csstart > skb_headlen(skb));
> + if (skb_csum_start_bug(skb, 100))
> + return;
>
> skb_copy_from_linear_data(skb, to, csstart);
>
>
>