Kenny Chang a écrit :
Hi Kenny
I am investigating how to reduce contention (and schedule() calls) on this workload.
Following patch already gave me less packet drops (but not yet *perfect*)
(10% packet loss instead of 30%, if 8 receivers on my 8 cpus machine)
David, this is a preliminary work, not meant for inclusion as is,
comments are welcome.
Thank you
[PATCH] net: sk_forward_alloc becomes an atomic_t
Commit 95766fff6b9a78d11fc2d3812dd035381690b55d
(UDP: Add memory accounting) introduced a regression for high rate UDP flows,
because of extra lock_sock() in udp_recvmsg()
In order to reduce need for lock_sock() in UDP receive path, we might need
to declare sk_forward_alloc as an atomic_t.
udp_recvmsg() can avoid a lock_sock()/release_sock() pair.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
include/net/sock.h | 14 +++++++-------
net/core/sock.c | 31 +++++++++++++++++++------------
net/core/stream.c | 2 +-
net/ipv4/af_inet.c | 2 +-
net/ipv4/inet_diag.c | 2 +-
net/ipv4/tcp_input.c | 2 +-
net/ipv4/udp.c | 2 --
net/ipv6/udp.c | 2 --
net/sched/em_meta.c | 2 +-
9 files changed, 31 insertions(+), 28 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 4bb1ff9..c4befb9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -250,7 +250,7 @@ struct sock {
struct sk_buff_head sk_async_wait_queue;
#endif
int sk_wmem_queued;
- int sk_forward_alloc;
+ atomic_t sk_forward_alloc;
gfp_t sk_allocation;
int sk_route_caps;
int sk_gso_type;
@@ -823,7 +823,7 @@ static inline int sk_wmem_schedule(struct sock *sk, int size)
{
if (!sk_has_account(sk))
return 1;
- return size <= sk->sk_forward_alloc ||
+ return size <= atomic_read(&sk->sk_forward_alloc) ||
__sk_mem_schedule(sk, size, SK_MEM_SEND);
}
@@ -831,7 +831,7 @@ static inline int sk_rmem_schedule(struct sock *sk, int size)
{
if (!sk_has_account(sk))
return 1;
- return size <= sk->sk_forward_alloc ||
+ return size <= atomic_read(&sk->sk_forward_alloc) ||
__sk_mem_schedule(sk, size, SK_MEM_RECV);
}
@@ -839,7 +839,7 @@ static inline void sk_mem_reclaim(struct sock *sk)
{
if (!sk_has_account(sk))
return;
- if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
+ if (atomic_read(&sk->sk_forward_alloc) >= SK_MEM_QUANTUM)
__sk_mem_reclaim(sk);
}
@@ -847,7 +847,7 @@ static inline void sk_mem_reclaim_partial(struct sock *sk)
{
if (!sk_has_account(sk))
return;
- if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
+ if (atomic_read(&sk->sk_forward_alloc) > SK_MEM_QUANTUM)
__sk_mem_reclaim(sk);
}
@@ -855,14 +855,14 @@ static inline void sk_mem_charge(struct sock *sk, int size)
{
if (!sk_has_account(sk))
return;
- sk->sk_forward_alloc -= size;
+ atomic_sub(size, &sk->sk_forward_alloc);
}
static inline void sk_mem_uncharge(struct sock *sk, int size)
{
if (!sk_has_account(sk))
return;
- sk->sk_forward_alloc += size;
+ atomic_add(size, &sk->sk_forward_alloc);
}
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
diff --git a/net/core/sock.c b/net/core/sock.c
index 0620046..8489105 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1081,7 +1081,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_cache = NULL;
newsk->sk_wmem_queued = 0;
- newsk->sk_forward_alloc = 0;
+ atomic_set(&newsk->sk_forward_alloc, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
@@ -1479,7 +1479,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
int amt = sk_mem_pages(size);
int allocated;
- sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+ atomic_add(amt * SK_MEM_QUANTUM, &sk->sk_forward_alloc);
allocated = atomic_add_return(amt, prot->memory_allocated);
/* Under limit. */
@@ -1520,7 +1520,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
if (prot->sysctl_mem[2] > alloc *
sk_mem_pages(sk->sk_wmem_queued +
atomic_read(&sk->sk_rmem_alloc) +
- sk->sk_forward_alloc))
+ atomic_read(&sk->sk_forward_alloc)))
return 1;
}
@@ -1537,7 +1537,7 @@ suppress_allocation:
}
/* Alas. Undo changes. */
- sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
+ atomic_sub(amt * SK_MEM_QUANTUM, &sk->sk_forward_alloc);
atomic_sub(amt, prot->memory_allocated);
return 0;
}
@@ -1551,14 +1551,21 @@ EXPORT_SYMBOL(__sk_mem_schedule);
void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
-
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
- prot->memory_allocated);
- sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
-
- if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
- *prot->memory_pressure = 0;
+ int val = atomic_read(&sk->sk_forward_alloc);
+
+begin:
+ val = atomic_read(&sk->sk_forward_alloc);
+ if (val >= SK_MEM_QUANTUM) {
+ if (atomic_cmpxchg(&sk->sk_forward_alloc, val,
+ val & (SK_MEM_QUANTUM - 1)) != val)
+ goto begin;
+ atomic_sub(val >> SK_MEM_QUANTUM_SHIFT,
+ prot->memory_allocated);
+
+ if (prot->memory_pressure && *prot->memory_pressure &&
+ (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ *prot->memory_pressure = 0;
+ }
}
EXPORT_SYMBOL(__sk_mem_reclaim);
diff --git a/net/core/stream.c b/net/core/stream.c
index 8727cea..4d04d28 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -198,7 +198,7 @@ void sk_stream_kill_queues(struct sock *sk)
sk_mem_reclaim(sk);
WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON(atomic_read(&sk->sk_forward_alloc));
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 627be4d..7a1475c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -152,7 +152,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON(atomic_read(&sk->sk_forward_alloc));
kfree(inet->opt);
dst_release(sk->sk_dst_cache);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 588a779..903ad66 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -158,7 +158,7 @@ static int inet_csk_diag_fill(struct sock *sk,
if (minfo) {
minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc);
minfo->idiag_wmem = sk->sk_wmem_queued;
- minfo->idiag_fmem = sk->sk_forward_alloc;
+ minfo->idiag_fmem = atomic_read(&sk->sk_forward_alloc);
minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a6961d7..5e08f37 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5258,7 +5258,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_rcv_rtt_measure_ts(sk, skb);
- if ((int)skb->truesize > sk->sk_forward_alloc)
+ if ((int)skb->truesize > atomic_read(&sk->sk_forward_alloc))
goto step5;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4bd178a..dcc246a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -955,9 +955,7 @@ try_again:
err = ulen;
out_free:
- lock_sock(sk);
skb_free_datagram(sk, skb);
- release_sock(sk);
out:
return err;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 84b1a29..582b80a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -257,9 +257,7 @@ try_again:
err = ulen;
out_free:
- lock_sock(sk);
skb_free_datagram(sk, skb);
- release_sock(sk);
out:
return err;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 72cf86e..94d90b6 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -383,7 +383,7 @@ META_COLLECTOR(int_sk_wmem_queued)
META_COLLECTOR(int_sk_fwd_alloc)
{
SKIP_NONLOCAL(skb);
- dst->value = skb->sk->sk_forward_alloc;
+ dst->value = atomic_read(&skb->sk->sk_forward_alloc);
}
META_COLLECTOR(int_sk_sndbuf)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to
majordomo@vger.kernel.org
More majordomo info at
http://vger.kernel.org/majordomo-info.html