On Mon, 2010-02-08 at 10:55 -0800, Sridhar Samudrala wrote:
I've encountered some more problems, with various users of
macvtap_file_get_queue() either calling or neglecting to call
macvtap_file_put_queue() in error cases.
I modified your patch so that when macvtap_file_get_queue() returns 0,
it also calls rcu_read_unlock_bh(), and modified the users
appropriately.
This patch also incorporates my preemption fix for macvlan_count_rx().
Signed-off-by: Ed Swierk <eswierk@aristanetworks.com>
---
On Mon, 2010-02-08 at 09:14 -0800, Ed Swierk wrote:
I am also seeing this issue with net-next-2.6.
Basically macvtap_put_user() and macvtap_get_user() call copy_to/from_user
from within a RCU read-side critical section.
The following patch fixes this issue by releasing the RCU read lock before
calling these routines, but instead hold a reference to q->sk.
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Index: linux-2.6.29.6/drivers/net/macvtap.c
===================================================================
--- linux-2.6.29.6.orig/drivers/net/macvtap.c
+++ linux-2.6.29.6/drivers/net/macvtap.c
@@ -160,8 +160,12 @@ static void macvtap_del_queues(struct ne
static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
{
+ struct macvtap_queue *q;
rcu_read_lock_bh();
- return rcu_dereference(file->private_data);
+ q = rcu_dereference(file->private_data);
+ if (!q)
+ rcu_read_unlock_bh();
+ return q;
}
static inline void macvtap_file_put_queue(void)
@@ -313,13 +317,14 @@ static unsigned int macvtap_poll(struct
sock_writeable(&q->sk)))
mask |= POLLOUT | POLLWRNORM;
-out:
macvtap_file_put_queue();
+
+out:
return mask;
}
/* Get packet from user space buffer */
-static ssize_t macvtap_get_user(struct macvtap_queue *q,
+static ssize_t macvtap_get_user(struct macvlan_dev *vlan, struct sock *sk,
struct iovec *iv, size_t count,
int noblock)
{
@@ -330,10 +335,10 @@ static ssize_t macvtap_get_user(struct m
if (unlikely(len < ETH_HLEN))
return -EINVAL;
- skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err);
+ skb = sock_alloc_send_skb(sk, NET_IP_ALIGN + len, noblock, &err);
if (!skb) {
- macvlan_count_rx(q->vlan, 0, false, false);
+ macvlan_count_rx(vlan, 0, false, false);
return err;
}
@@ -341,14 +346,14 @@ static ssize_t macvtap_get_user(struct m
skb_put(skb, count);
if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
- macvlan_count_rx(q->vlan, 0, false, false);
+ macvlan_count_rx(vlan, 0, false, false);
kfree_skb(skb);
return -EFAULT;
}
skb_set_network_header(skb, ETH_HLEN);
- macvlan_start_xmit(skb, q->vlan->dev);
+ macvlan_start_xmit(skb, vlan->dev);
return count;
}
@@ -359,23 +364,29 @@ static ssize_t macvtap_aio_write(struct
struct file *file = iocb->ki_filp;
ssize_t result = -ENOLINK;
struct macvtap_queue *q = macvtap_file_get_queue(file);
+ struct macvlan_dev *vlan;
+ struct sock *sk;
if (!q)
goto out;
- result = macvtap_get_user(q, (struct iovec *) iv, iov_length(iv, count),
+ vlan = q->vlan;
+ sk = &q->sk;
+ sock_hold(sk);
+ macvtap_file_put_queue();
+
+ result = macvtap_get_user(vlan, sk, (struct iovec *) iv, iov_length(iv, count),
file->f_flags & O_NONBLOCK);
+ sock_put(sk);
out:
- macvtap_file_put_queue();
return result;
}
/* Put packet to the user space buffer */
-static ssize_t macvtap_put_user(struct macvtap_queue *q,
+static ssize_t macvtap_put_user(struct macvlan_dev *vlan,
struct sk_buff *skb,
struct iovec *iv, int len)
{
- struct macvlan_dev *vlan = q->vlan;
int ret;
len = min_t(int, skb->len, len);
@@ -392,15 +403,20 @@ static ssize_t macvtap_aio_read(struct k
{
struct file *file = iocb->ki_filp;
struct macvtap_queue *q = macvtap_file_get_queue(file);
+ struct macvlan_dev *vlan;
+ struct sock *sk;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t len, ret = 0;
- if (!q) {
- ret = -ENOLINK;
- goto out;
- }
+ if (!q)
+ return -ENOLINK;
+
+ vlan = q->vlan;
+ sk = &q->sk;
+ sock_hold(sk);
+ macvtap_file_put_queue();
len = iov_length(iv, count);
if (len < 0) {
@@ -408,12 +424,12 @@ static ssize_t macvtap_aio_read(struct k
goto out;
}
- add_wait_queue(q->sk.sk_sleep, &wait);
+ add_wait_queue(sk->sk_sleep, &wait);
while (len) {
current->state = TASK_INTERRUPTIBLE;
/* Read frames from the queue */
- skb = skb_dequeue(&q->sk.sk_receive_queue);
+ skb = skb_dequeue(&sk->sk_receive_queue);
if (!skb) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
@@ -427,16 +443,16 @@ static ssize_t macvtap_aio_read(struct k
schedule();
continue;
}
- ret = macvtap_put_user(q, skb, (struct iovec *) iv, len);
+ ret = macvtap_put_user(vlan, skb, (struct iovec *) iv, len);
kfree_skb(skb);
break;
}
current->state = TASK_RUNNING;
- remove_wait_queue(q->sk.sk_sleep, &wait);
+ remove_wait_queue(sk->sk_sleep, &wait);
out:
- macvtap_file_put_queue();
+ sock_put(sk);
return ret;
}
Index: linux-2.6.29.6/include/linux/if_macvlan.h
===================================================================
--- linux-2.6.29.6.orig/include/linux/if_macvlan.h
+++ linux-2.6.29.6/include/linux/if_macvlan.h
@@ -42,8 +42,9 @@ static inline void macvlan_count_rx(cons
bool multicast)
{
struct macvlan_rx_stats *rx_stats;
+ int cpu = get_cpu();
- rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id());
+ rx_stats = per_cpu_ptr(vlan->rx_stats, cpu);
if (likely(success)) {
rx_stats->rx_packets++;;
rx_stats->rx_bytes += len;
@@ -52,6 +53,7 @@ static inline void macvlan_count_rx(cons
} else {
rx_stats->rx_errors++;
}
+ put_cpu();
}
extern int macvlan_common_newlink(struct net_device *dev,
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to
majordomo@vger.kernel.org
More majordomo info at
http://vger.kernel.org/majordomo-info.html