Make per-netns expectation hash and expectation count.
Expectation always belongs to netns to which it's master conntrack belongs.
This is natural and allows to not bloat expectations.
Proc files and leaf users in protocol modules are stubbed to init_net,
this is temporary.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack_expect.h | 20 ++++--
include/net/netns/conntrack.h | 3 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 -
net/ipv4/netfilter/nf_nat_pptp.c | 2
net/netfilter/nf_conntrack_core.c | 8 +-
net/netfilter/nf_conntrack_expect.c | 53 ++++++++----------
net/netfilter/nf_conntrack_h323_main.c | 2
net/netfilter/nf_conntrack_helper.c | 2
net/netfilter/nf_conntrack_netlink.c | 12 ++--
net/netfilter/nf_conntrack_pptp.c | 4 -
net/netfilter/nf_conntrack_sip.c | 3 -
11 files changed, 61 insertions(+), 52 deletions(-)
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,7 +6,6 @@
#define _NF_CONNTRACK_EXPECT_H
#include <net/netfilter/nf_conntrack.h>
-extern struct hlist_head *nf_ct_expect_hash;
extern unsigned int nf_ct_expect_hsize;
extern unsigned int nf_ct_expect_max;
@@ -56,6 +55,15 @@ struct nf_conntrack_expect
struct rcu_head rcu;
};
+static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
+{
+#ifdef CONFIG_NET_NS
+ return exp->master->ct_net; /* by definition */
+#else
+ return &init_net;
+#endif
+}
+
struct nf_conntrack_expect_policy
{
unsigned int max_expected;
@@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy
#define NF_CT_EXPECT_PERMANENT 0x1
#define NF_CT_EXPECT_INACTIVE 0x2
-int nf_conntrack_expect_init(void);
-void nf_conntrack_expect_fini(void);
+int ...This one again introduces overly long lines, please fix up the remaining patches yourself and resend. I'll upload my current tree to kernel.org so you can use it as a base (will take a couple of minutes): git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git --
->help hook can run concurrently with itself, so iterating over SIP helpers
with static pointer can't work reliably.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2f9bbc0..1fa306b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1193,7 +1193,6 @@ static const struct sip_handler sip_handlers[] = {
static int process_sip_response(struct sk_buff *skb,
const char **dptr, unsigned int *datalen)
{
- static const struct sip_handler *handler;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
unsigned int matchoff, matchlen;
@@ -1214,6 +1213,8 @@ static int process_sip_response(struct sk_buff *skb,
dataoff = matchoff + matchlen + 1;
for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ const struct sip_handler *handler;
+
handler = &sip_handlers[i];
if (handler->response == NULL)
continue;
@@ -1228,13 +1229,14 @@ static int process_sip_response(struct sk_buff *skb,
static int process_sip_request(struct sk_buff *skb,
const char **dptr, unsigned int *datalen)
{
- static const struct sip_handler *handler;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
unsigned int matchoff, matchlen;
unsigned int cseq, i;
for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
+ const struct sip_handler *handler;
+
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
--
gre_keymap_list should be protected in all places.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 0e3d124..2752b74 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -97,10 +97,14 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
kmp = &help->help.ct_pptp_info.keymap[dir];
if (*kmp) {
/* check whether it's a retransmission */
+ read_lock_bh(&nf_ct_gre_lock);
list_for_each_entry(km, &gre_keymap_list, list) {
- if (gre_key_cmpfn(km, t) && km == *kmp)
+ if (gre_key_cmpfn(km, t) && km == *kmp) {
+ read_unlock_bh(&nf_ct_gre_lock);
return 0;
+ }
}
+ read_unlock_bh(&nf_ct_gre_lock);
pr_debug("trying to override keymap_%s for ct %p\n",
dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
return -EEXIST;
--
It does "kfree(list_head)" which looks wrong because entity that was
allocated is definitely not list_head.
However, this all works because list_head is first item in
struct nf_ct_gre_keymap .
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 2752b74..c5a7822 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -45,12 +45,12 @@ static LIST_HEAD(gre_keymap_list);
void nf_ct_gre_keymap_flush(void)
{
- struct list_head *pos, *n;
+ struct nf_ct_gre_keymap *km, *tmp;
write_lock_bh(&nf_ct_gre_lock);
- list_for_each_safe(pos, n, &gre_keymap_list) {
- list_del(pos);
- kfree(pos);
+ list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) {
+ list_del(&km->list);
+ kfree(km);
}
write_unlock_bh(&nf_ct_gre_lock);
}
--
The first three patches are already in Linus' tree. --
Take netns from skb->dst->dev. It should be safe because, they are called
from LOCAL_OUT hook where dst is valid (though, I'm not exactly sure about
IPVS and queueing packets to userspace).
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f8edacd..9c54024 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,6 +12,7 @@
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
{
+ struct net *net = dev_net(skb->dst->dev);
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi fl = {};
@@ -19,7 +20,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
unsigned int hh_len;
unsigned int type;
- type = inet_addr_type(&init_net, iph->saddr);
+ type = inet_addr_type(net, iph->saddr);
if (addr_type == RTN_UNSPEC)
addr_type = type;
@@ -33,7 +34,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
fl.mark = skb->mark;
- if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+ if (ip_route_output_key(net, &rt, &fl) != 0)
return -1;
/* Drop old route. */
@@ -43,7 +44,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
fl.nl_u.ip4_u.daddr = iph->saddr;
- if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+ if (ip_route_output_key(net, &rt, &fl) != 0)
return -1;
odst = skb->dst;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8c6c5e7..4cb4844 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
.saddr = iph->saddr, } },
};
- dst = ip6_route_output(&init_net, skb->sk, &fl);
+ dst = ...Its safe in all cases since they already expect to only get called when skb->dst is valid. Applied, thanks. --
What is confirmed connection in one netns can very well be unconfirmed
in another one.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 1c37356..b4b45c5 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -72,6 +72,5 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto);
extern spinlock_t nf_conntrack_lock ;
-extern struct hlist_head unconfirmed;
#endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e453a33..6ddf58e 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -1,6 +1,7 @@
#ifndef __NETNS_CONNTRACK_H
#define __NETNS_CONNTRACK_H
+#include <linux/list.h>
#include <asm/atomic.h>
struct netns_ct {
@@ -8,6 +9,7 @@ struct netns_ct {
unsigned int expect_count;
struct hlist_head *hash;
struct hlist_head *expect_hash;
+ struct hlist_head unconfirmed;
int hash_vmalloc;
int expect_vmalloc;
};
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c188ede..2a105db 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,7 +54,6 @@ struct nf_conn nf_conntrack_untracked __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
unsigned int nf_ct_log_invalid __read_mostly;
-HLIST_HEAD(unconfirmed);
static struct kmem_cache *nf_conntrack_cachep __read_mostly;
DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
@@ -596,7 +595,8 @@ init_conntrack(struct net *net,
}
/* Overload tuple linked list to put us in unconfirmed list. */
- hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed);
+ hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
+ &net->ct.unconfirmed);
...It's deducible from skb->dev or skb->dst->dev, but we know netns at
the moment of call, so pass it down and use for finding and creating
conntracks.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index b4b45c5..e78afe7 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -20,7 +20,8 @@
/* This header is used to share core functionality between the
standalone connection tracking module, and the compatibility layer's use
of connection tracking. */
-extern unsigned int nf_conntrack_in(u_int8_t pf,
+extern unsigned int nf_conntrack_in(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
struct sk_buff *skb);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 03dd108..2e4dd3f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -172,7 +172,7 @@ static unsigned int ipv4_conntrack_in(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
}
static unsigned int ipv4_conntrack_local(unsigned int hooknum,
@@ -188,7 +188,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 85050c0..e91db16 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -211,11 ...Make per-netns a) expectation hash and b) expectations count.
Expectations always belongs to netns to which it's master conntrack belong.
This is natural and doesn't bloat expectation.
Proc files and leaf users are stubbed to init_net, this is temporary.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 4c4d894..37a7fc1 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -6,7 +6,6 @@
#define _NF_CONNTRACK_EXPECT_H
#include <net/netfilter/nf_conntrack.h>
-extern struct hlist_head *nf_ct_expect_hash;
extern unsigned int nf_ct_expect_hsize;
extern unsigned int nf_ct_expect_max;
@@ -56,6 +55,15 @@ struct nf_conntrack_expect
struct rcu_head rcu;
};
+static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp)
+{
+#ifdef CONFIG_NET_NS
+ return exp->master->ct_net; /* by definition */
+#else
+ return &init_net;
+#endif
+}
+
struct nf_conntrack_expect_policy
{
unsigned int max_expected;
@@ -67,17 +75,17 @@ struct nf_conntrack_expect_policy
#define NF_CT_EXPECT_PERMANENT 0x1
#define NF_CT_EXPECT_INACTIVE 0x2
-int nf_conntrack_expect_init(void);
-void nf_conntrack_expect_fini(void);
+int nf_conntrack_expect_init(struct net *net);
+void nf_conntrack_expect_fini(struct net *net);
struct nf_conntrack_expect *
-__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple);
+__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
-nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple);
+nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
struct nf_conntrack_expect *
-nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple);
+nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple);
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
...I assume these message are globally visible even with namespaces? Can we make this (and the corresponding ct hash message) refer to the namespace? Otherwise it might be a bit confusing. --
This is somewhat peculiar situation. netns doesn't have unique ID like, say, ifindex. The only number related to netns is "struct net *". They can be distinguised by pointer value, but userspace when does clone(CLONE_NEWNET) do not, obviously, control it and after creation doesn't have a way to find it out. And if we print with "%p, net" kernelspace pointer get exposed which is not nice. --
No, that also wouldn't solve the confusion. I guess conntrack is not the only subsystem which prints netns related messages, so how about adding an unique identifier that can be associated by userspace? --
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5456e4b..02eaf87 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -40,18 +40,20 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
EXPORT_SYMBOL_GPL(print_tuple);
struct ct_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_get_first(struct seq_file *seq)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket++) {
- n = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ n = rcu_dereference(net->ct.hash[st->bucket].first);
if (n)
return n;
}
@@ -61,13 +63,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
static struct hlist_node *ct_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
- head = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ head = rcu_dereference(net->ct.hash[st->bucket].first);
}
return head;
}
@@ -177,7 +180,7 @@ static const struct seq_operations ct_seq_ops = {
static int ct_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ct_seq_ops,
+ return seq_open_net(inode, file, &ct_seq_ops,
sizeof(struct ct_iter_state));
}
@@ -186,7 +189,7 @@ static const struct file_operations ct_file_ops = {
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
@@ -277,38 ...Alexey Dobriyan wrote: Applied, thanks. --
Again, it's deducible from skb, but we're going to use it for
nf_conntrack_checksum and statistics, so just pass it from upper layer.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index d4376e9..97723d3 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -50,7 +50,7 @@ struct nf_conntrack_l4proto
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct nf_conn *ct);
- int (*error)(struct sk_buff *skb, unsigned int dataoff,
+ int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
u_int8_t pf, unsigned int hooknum);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index daf3463..8c7ed5b 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
-icmp_error_message(struct sk_buff *skb,
+icmp_error_message(struct net *net, struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
@@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(&init_net, &innertuple);
+ h = nf_conntrack_find_get(net, &innertuple);
if (!h) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
@@ -172,7 +172,7 @@ icmp_error_message(struct sk_buff *skb,
/* Small and modified version of icmp_rcv */
static int
-icmp_error(struct sk_buff *skb, unsigned int dataoff,
+icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
{
const struct icmphdr *icmph;
@@ -217,7 ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 5307316..6a09200 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -423,12 +423,13 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_related);
#ifdef CONFIG_PROC_FS
struct ct_expect_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
- struct net *net = &init_net;
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
struct hlist_node *n;
@@ -443,7 +444,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct hlist_node *head)
{
- struct net *net = &init_net;
+ struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(head->next);
@@ -524,7 +525,7 @@ static const struct seq_operations exp_seq_ops = {
static int exp_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &exp_seq_ops,
+ return seq_open_net(inode, file, &exp_seq_ops,
sizeof(struct ct_expect_iter_state));
}
@@ -533,26 +534,26 @@ static const struct file_operations exp_file_ops = {
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* CONFIG_PROC_FS */
-static int exp_proc_init(void)
+static int exp_proc_init(struct net *net)
{
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc;
- proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
+ proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
if (!proc)
return -ENOMEM;
#endif /* CONFIG_PROC_FS */
return 0;
}
-static void exp_proc_remove(void)
+static void exp_proc_remove(struct net ...Alexey Dobriyan wrote: Applied, thanks. --
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f8636a5..b294083 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -21,18 +21,20 @@
#include <net/netfilter/nf_conntrack_acct.h>
struct ct_iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *ct_get_first(struct seq_file *seq)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
struct hlist_node *n;
for (st->bucket = 0;
st->bucket < nf_conntrack_htable_size;
st->bucket++) {
- n = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ n = rcu_dereference(net->ct.hash[st->bucket].first);
if (n)
return n;
}
@@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq)
static struct hlist_node *ct_get_next(struct seq_file *seq,
struct hlist_node *head)
{
+ struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next);
while (head == NULL) {
if (++st->bucket >= nf_conntrack_htable_size)
return NULL;
- head = rcu_dereference(init_net.ct.hash[st->bucket].first);
+ head = rcu_dereference(net->ct.hash[st->bucket].first);
}
return head;
}
@@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = {
static int ct_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ct_seq_ops,
- sizeof(struct ct_iter_state));
+ return seq_open_net(inode, file, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
}
static const struct file_operations ct_file_ops = {
@@ -167,17 +170,18 @@ static const struct file_operations ct_file_ops = {
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = ...Alexey Dobriyan wrote: Applied, thanks. --
Conntrack code will use it for a) removing expectations and helpers when corresponding module is removed, and b) removing conntracks when L3 protocol conntrack module is removed. Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7c52fe2..b0dc818 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,6 +18,7 @@ static struct list_head *first_device = &pernet_list; static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); +EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net; EXPORT_SYMBOL(init_net); --
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 920e778..9c06b9f 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -123,29 +123,18 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
+ struct net *net)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
const struct hlist_node *n, *next;
unsigned int i;
- mutex_lock(&nf_ct_helper_mutex);
- hlist_del_rcu(&me->hnode);
- nf_ct_helper_count--;
- mutex_unlock(&nf_ct_helper_mutex);
-
- /* Make sure every nothing is still using the helper unless its a
- * connection in the hash.
- */
- synchronize_rcu();
-
- spin_lock_bh(&nf_conntrack_lock);
-
/* Get rid of expectations */
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &init_net.ct.expect_hash[i], hnode) {
+ &net->ct.expect_hash[i], hnode) {
struct nf_conn_help *help = nfct_help(exp->master);
if ((help->helper == me || exp->helper == me) &&
del_timer(&exp->timeout)) {
@@ -156,12 +145,31 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
}
/* Get rid of expecteds, set helpers to NULL. */
- hlist_for_each_entry(h, n, &init_net.ct.unconfirmed, hnode)
+ hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode)
unhelp(h, me);
for (i = 0; i < nf_conntrack_htable_size; i++) {
- hlist_for_each_entry(h, n, &init_net.ct.hash[i], hnode)
+ hlist_for_each_entry(h, n, &net->ct.hash[i], hnode)
unhelp(h, me);
}
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+ struct net ...Alexey Dobriyan wrote: Applied, thanks. --
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 3a2f7ef..a59a307 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -207,6 +207,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
{
+ struct net *net;
+
BUG_ON(proto->l3proto >= AF_MAX);
mutex_lock(&nf_ct_proto_mutex);
@@ -219,7 +221,8 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(&init_net, kill_l3proto, proto);
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, kill_l3proto, proto);
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
@@ -316,6 +319,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
{
+ struct net *net;
+
BUG_ON(l4proto->l3proto >= PF_MAX);
mutex_lock(&nf_ct_proto_mutex);
@@ -328,7 +333,8 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(&init_net, kill_l4proto, l4proto);
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
--
Alexey Dobriyan wrote: Applthanks. --
This is cleaner, we already know conntrack to which event is relevant.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index f0b9078..c1b406c 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -28,10 +28,8 @@ extern void __nf_ct_event_cache_init(struct nf_conn *ct);
extern void nf_ct_event_cache_flush(void);
static inline void
-nf_conntrack_event_cache(enum ip_conntrack_events event,
- const struct sk_buff *skb)
+nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
- struct nf_conn *ct = (struct nf_conn *)skb->nfct;
struct nf_conntrack_ecache *ecache;
local_bh_disable();
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 8c7ed5b..205ba39 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
} else {
atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
}
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 11976ea..7eed1fb 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -192,7 +192,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
ct, CTINFO2DIR(ctinfo));
- nf_conntrack_event_cache(IPCT_NATSEQADJ, skb);
+ nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
}
return 1;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index aabddfe..df04de9 100644
--- ...Heh, last minute proof-reading of this patch made me think,
that this is actually unneeded, simply because "ct" pointers will be
different for different conntracks in different netns, just like they
are different in one netns.
Not so sure anymore.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index c1b406c..35f814c 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -8,6 +8,7 @@
#include <linux/notifier.h>
#include <linux/interrupt.h>
+#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack_expect.h>
#ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -15,9 +16,6 @@ struct nf_conntrack_ecache {
struct nf_conn *ct;
unsigned int events;
};
-DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
-
-#define CONNTRACK_ECACHE(x) (__get_cpu_var(nf_conntrack_ecache).x)
extern struct atomic_notifier_head nf_conntrack_chain;
extern int nf_conntrack_register_notifier(struct notifier_block *nb);
@@ -25,15 +23,16 @@ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
extern void __nf_ct_event_cache_init(struct nf_conn *ct);
-extern void nf_ct_event_cache_flush(void);
+extern void nf_ct_event_cache_flush(struct net *net);
static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
local_bh_disable();
- ecache = &__get_cpu_var(nf_conntrack_ecache);
+ ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
if (ct != ecache->ct)
__nf_ct_event_cache_init(ct);
ecache->events |= event;
@@ -58,6 +57,9 @@ nf_ct_expect_event(enum ip_conntrack_expect_events event,
atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp);
}
+extern int ...Its necessary because the cache needs to be flushed on netns exit
and this is only allowed while its not in use anymore.
I don't see anything in this series actually making sure nothing
hits the cache on exit though. Am I missing something?
Additionally (I might have missed a following patch moving it
out though) this doesn't belong in the netns exit path:
void nf_conntrack_cleanup(struct net *net)
{
rcu_assign_pointer(ip_ct_attach, NULL);
...
rcu_assign_pointer(nf_ct_destroy, NULL);
--
When netns refcount hits zero, netdevices in it will start dropping packets. And there is synchronize_net() call before cache flush. This is dealt with in 17/33 Have you got 18/33, archives show it's missing? --
There are two patches labeled 17/33, I assume the second one is actually 18/33. --
Yes, that looks fine. Applied, thanks. BTW, doesn't __vlan_hwaccel_rx() also needs a netns_alive() check to avoid passing packets to AF_PACKET sockets in dead namespaces? --
Add checks for init_net to not create kmem caches twice and so on.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b55944e..52d0663 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1016,7 +1016,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_flush);
supposed to kill the mall. */
void nf_conntrack_cleanup(struct net *net)
{
- rcu_assign_pointer(ip_ct_attach, NULL);
+ if (net_eq(net, &init_net))
+ rcu_assign_pointer(ip_ct_attach, NULL);
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
@@ -1035,16 +1036,21 @@ void nf_conntrack_cleanup(struct net *net)
while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
schedule();
- rcu_assign_pointer(nf_ct_destroy, NULL);
+ if (net_eq(net, &init_net)) {
+ rcu_assign_pointer(nf_ct_destroy, NULL);
- kmem_cache_destroy(nf_conntrack_cachep);
+ kmem_cache_destroy(nf_conntrack_cachep);
+ }
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
- nf_conntrack_acct_fini();
+ if (net_eq(net, &init_net))
+ nf_conntrack_acct_fini();
nf_conntrack_expect_fini(net);
- nf_conntrack_helper_fini();
- nf_conntrack_proto_fini();
+ if (net_eq(net, &init_net)) {
+ nf_conntrack_helper_fini();
+ nf_conntrack_proto_fini();
+ }
}
struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
@@ -1134,22 +1140,28 @@ int nf_conntrack_init(struct net *net)
int max_factor = 8;
int ret;
- /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
- * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
- if (!nf_conntrack_htable_size) {
- nf_conntrack_htable_size
- = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct hlist_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- nf_conntrack_htable_size = 16384;
- if ...Having multiple of these net_eq checks per function (14 total) is not a very nice way to do this. How about splitting the code into a netns and a global part instead? --
Yep, I was just afraid of some subtle ordering rules and to keep Prebably they aren't strict at all. --
Not particulary. For cleanup a three stage approach with
1. init_net deactivation (ip_ct_attach = NULL)
2. generic netns cleanup
3. init_net specific final cleanup (slab cache, nf_conntrack_cachep,
accounting, helpers, protocols, ...)
should work fine.
The initialization should be OK with just a init_net part
and a generic netns part.
--
Ugh, I'm still finding the least ugly way to put init_net checks, and it's better to do it at the very end. So, slight reordering. See per-netns statistics, nf_conntrack_count, nf_conntrack_checksum, nf_conntrack_log_invalid and accounting. The rest (SIP, H323, GRE, PPTP, per-netns NAT) remains the same and can be applied independently of init_net checks. --
Ping! I've just sent patch which adds init_net checks in somewhat nicer way. Please, review and apply the rest. --
I'll do that this week during the netfilter workshop. --
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack.h | 8 +-
include/net/netns/conntrack.h | 1
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 -
net/netfilter/nf_conntrack_core.c | 49 +++++++++---------
net/netfilter/nf_conntrack_expect.c | 4 -
net/netfilter/nf_conntrack_standalone.c | 4 -
6 files changed, 38 insertions(+), 32 deletions(-)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size;
extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
-#define NF_CT_STAT_INC_ATOMIC(count) \
+#define NF_CT_STAT_INC(net, count) \
+ (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+#define NF_CT_STAT_INC_ATOMIC(net, count) \
do { \
local_bh_disable(); \
- __get_cpu_var(nf_conntrack_stat).count++; \
+ per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \
local_bh_enable(); \
} while (0)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -12,6 +12,7 @@ struct netns_ct {
struct hlist_head *hash;
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
+ struct ip_conntrack_stat *stat;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
@@ -308,7 +308,7 @@ static void ...Show correct conntrack count, while I'm at it.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 +++++++++-----
net/netfilter/nf_conntrack_standalone.c | 14 +++++++++-----
2 files changed, 18 insertions(+), 10 deletions(-)
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -285,6 +285,7 @@ static const struct file_operations ip_exp_file_ops = {
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
if (*pos == 0)
@@ -294,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -302,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -320,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
- unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
@@ -360,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ct_cpu_seq_ops);
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct ...Applied. --
Note, sysctl table is always duplicated, this is simpler and less
special-cased.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netns/conntrack.h | 4 +
net/netfilter/nf_conntrack_standalone.c | 73 +++++++++++++++++---------------
2 files changed, 45 insertions(+), 32 deletions(-)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <asm/atomic.h>
+struct ctl_table_header;
struct nf_conntrack_ecache;
struct netns_ct {
@@ -16,6 +17,9 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+#endif
int hash_vmalloc;
int expect_vmalloc;
};
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-static struct ctl_table_header *nf_ct_sysctl_header;
static struct ctl_table_header *nf_ct_netfilter_header;
static ctl_table nf_ct_sysctl_table[] = {
@@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = {
EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
- nf_ct_netfilter_header =
- register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
- if (!nf_ct_netfilter_header)
- goto out;
-
- nf_ct_sysctl_header =
- register_sysctl_paths(nf_net_netfilter_sysctl_path,
- nf_ct_sysctl_table);
- if (!nf_ct_sysctl_header)
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net)) {
+ nf_ct_netfilter_header =
+ register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+ if (!nf_ct_netfilter_header)
+ goto out;
+ }
+
+ table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
+ GFP_KERNEL);
+ if ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack.h | 1 -
include/net/netns/conntrack.h | 1 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +-
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
net/netfilter/nf_conntrack_proto_dccp.c | 2 +-
net/netfilter/nf_conntrack_proto_tcp.c | 2 +-
net/netfilter/nf_conntrack_proto_udp.c | 2 +-
net/netfilter/nf_conntrack_proto_udplite.c | 2 +-
net/netfilter/nf_conntrack_standalone.c | 7 +++----
10 files changed, 11 insertions(+), 12 deletions(-)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb)
extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
-extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
#define NF_CT_STAT_INC(net, count) \
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,6 +17,7 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_checksum;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
.procname = "ip_conntrack_checksum",
- .data = &nf_conntrack_checksum,
+ .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack_l4proto.h | 15 +++++++--------
include/net/netns/conntrack.h | 1 +
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 +++---
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +-
net/netfilter/nf_conntrack_core.c | 1 -
net/netfilter/nf_conntrack_proto_dccp.c | 10 ++++++----
net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++--------
net/netfilter/nf_conntrack_proto_udp.c | 6 +++---
net/netfilter/nf_conntrack_proto_udplite.c | 8 ++++----
net/netfilter/nf_conntrack_standalone.c | 6 +++---
11 files changed, 39 insertions(+), 36 deletions(-)
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
extern const struct nla_policy nf_ct_port_nla_policy[];
-/* Log invalid packets */
-extern unsigned int nf_ct_log_invalid;
-
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
-#define LOG_INVALID(proto) \
- (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
+#define LOG_INVALID(net, proto) \
+ ((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else
-#define LOG_INVALID(proto) \
- ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
+#define LOG_INVALID(net, proto) \
+ (((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
-#define LOG_INVALID(proto) 0
+#define LOG_INVALID(net, proto) 0
#endif /* CONFIG_SYSCTL */
#endif /*_NF_CONNTRACK_PROTOCOL_H*/
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,6 +18,7 @@ struct netns_ct {
struct nf_conntrack_ecache ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/net/netfilter/nf_conntrack_acct.h | 10 +--
include/net/netns/conntrack.h | 2
net/netfilter/nf_conntrack_acct.c | 100 +++++++++++++++++++++---------
net/netfilter/nf_conntrack_core.c | 4 -
4 files changed, 81 insertions(+), 35 deletions(-)
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -8,6 +8,7 @@
#ifndef _NF_CONNTRACK_ACCT_H
#define _NF_CONNTRACK_ACCT_H
+#include <net/net_namespace.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack.h>
@@ -18,8 +19,6 @@ struct nf_conn_counter {
u_int64_t bytes;
};
-extern int nf_ct_acct;
-
static inline
struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
{
@@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
static inline
struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conn_counter *acct;
- if (!nf_ct_acct)
+ if (!net->ct.sysctl_acct)
return NULL;
acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
@@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
extern unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
-extern int nf_conntrack_acct_init(void);
-extern void nf_conntrack_acct_fini(void);
+extern int nf_conntrack_acct_init(struct net *net);
+extern void nf_conntrack_acct_fini(struct net *net);
#endif /* _NF_CONNTRACK_ACCT_H */
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,10 +17,12 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
...Add init_net checks to not remove kmem_caches twice and so on.
Refactor functions to split code which should be executed only for
init_net into one place.
ip_ct_attach and ip_ct_destroy assignments remain separate, because
they're separate stages in setup and teardown.
NOTE: NOTRACK code is in for-every-net part. It will be made per-netns
after we decidce how to do it correctly.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
net/netfilter/nf_conntrack_core.c | 151 +++++++++++++++++++++++-------------
net/netfilter/nf_conntrack_expect.c | 26 +++---
2 files changed, 114 insertions(+), 63 deletions(-)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1010,17 +1010,15 @@ void nf_conntrack_flush(struct net *net)
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush);
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void nf_conntrack_cleanup(struct net *net)
+static void nf_conntrack_cleanup_init_net(void)
{
- rcu_assign_pointer(ip_ct_attach, NULL);
-
- /* This makes sure all current packets have passed through
- netfilter framework. Roll on, two-stage module
- delete... */
- synchronize_net();
+ nf_conntrack_helper_fini();
+ nf_conntrack_proto_fini();
+ kmem_cache_destroy(nf_conntrack_cachep);
+}
+static void nf_conntrack_cleanup_net(struct net *net)
+{
nf_ct_event_cache_flush(net);
nf_conntrack_ecache_fini(net);
i_see_dead_people:
@@ -1033,17 +1031,31 @@ void nf_conntrack_cleanup(struct net *net)
while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
schedule();
- rcu_assign_pointer(nf_ct_destroy, NULL);
-
- kmem_cache_destroy(nf_conntrack_cachep);
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
-
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
- nf_conntrack_helper_fini();
- nf_conntrack_proto_fini();
+}
+
+/* Mishearing the voices ...Show correct conntrack count while I'm at it.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index fdc85b3..313ebf0 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -285,6 +285,7 @@ static const struct file_operations ip_exp_file_ops = {
static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
if (*pos == 0)
@@ -294,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -302,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
int cpu;
for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return per_cpu_ptr(init_net.ct.stat, cpu);
+ return per_cpu_ptr(net->ct.stat, cpu);
}
return NULL;
@@ -320,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
- unsigned int nr_conntracks = atomic_read(&init_net.ct.count);
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
@@ -360,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = {
static int ct_cpu_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ct_cpu_seq_ops);
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ...For now just counted separatedly, not shown.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f5447f1..c955610 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,12 +290,12 @@ extern unsigned int nf_conntrack_htable_size;
extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
-DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
-#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++)
-#define NF_CT_STAT_INC_ATOMIC(count) \
+#define NF_CT_STAT_INC(net, count) \
+ (per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+#define NF_CT_STAT_INC_ATOMIC(net, count) \
do { \
local_bh_disable(); \
- __get_cpu_var(nf_conntrack_stat).count++; \
+ per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++; \
local_bh_enable(); \
} while (0)
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 9d5c162..fc0a46d 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -12,6 +12,7 @@ struct netns_ct {
struct hlist_head *hash;
struct hlist_head *expect_hash;
struct hlist_head unconfirmed;
+ struct ip_conntrack_stat *stat;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index b294083..fdc85b3 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -294,7 +294,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
if (!cpu_possible(cpu))
continue;
*pos = cpu+1;
- return &per_cpu(nf_conntrack_stat, cpu);
+ return per_cpu_ptr(init_net.ct.stat, cpu);
}
return NULL;
@@ -308,7 +308,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, ...Note, sysctl table is always duplicated, this is simpler, less special-cased
less mistakes (and I did one in first version of this patch)
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index fc0a46d..2b50758 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <asm/atomic.h>
+struct ctl_table_header;
struct nf_conntrack_ecache;
struct netns_ct {
@@ -16,6 +17,9 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+#ifdef CONFIG_SYSCTL
+ struct ctl_table_header *sysctl_header;
+#endif
int hash_vmalloc;
int expect_vmalloc;
};
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 169760d..64b4f95 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -330,7 +330,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_checksum);
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-static struct ctl_table_header *nf_ct_sysctl_header;
static struct ctl_table_header *nf_ct_netfilter_header;
static ctl_table nf_ct_sysctl_table[] = {
@@ -409,40 +408,58 @@ static struct ctl_path nf_ct_path[] = {
EXPORT_SYMBOL_GPL(nf_ct_log_invalid);
-static int nf_conntrack_standalone_init_sysctl(void)
+static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
- nf_ct_netfilter_header =
- register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
- if (!nf_ct_netfilter_header)
- goto out;
-
- nf_ct_sysctl_header =
- register_sysctl_paths(nf_net_netfilter_sysctl_path,
- nf_ct_sysctl_table);
- if (!nf_ct_sysctl_header)
+ struct ctl_table *table;
+
+ if (net_eq(net, &init_net)) {
+ nf_ct_netfilter_header =
+ register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table);
+ if (!nf_ct_netfilter_header)
+ goto ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index c955610..b76a868 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -287,7 +287,6 @@ static inline int nf_ct_is_untracked(const struct sk_buff *skb)
extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
-extern int nf_conntrack_checksum;
extern int nf_conntrack_max;
#define NF_CT_STAT_INC(net, count) \
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 2b50758..38b6dae 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,6 +17,7 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_checksum;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e4dd3f..75871b1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -270,7 +270,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
.procname = "ip_conntrack_checksum",
- .data = &nf_conntrack_checksum,
+ .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 205ba39..ace66cb 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -188,7 +188,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
}
/* See ip_conntrack_proto_tcp.c */
- if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ if (net->ct.sysctl_checksum && ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 97723d3..7f2f43c 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -117,20 +117,19 @@ extern int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t);
extern const struct nla_policy nf_ct_port_nla_policy[];
-/* Log invalid packets */
-extern unsigned int nf_ct_log_invalid;
-
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
-#define LOG_INVALID(proto) \
- (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW)
+#define LOG_INVALID(net, proto) \
+ ((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW)
#else
-#define LOG_INVALID(proto) \
- ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \
+#define LOG_INVALID(net, proto) \
+ (((net)->ct.sysctl_log_invalid == (proto) || \
+ (net)->ct.sysctl_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
#else
-#define LOG_INVALID(proto) 0
+#define LOG_INVALID(net, proto) 0
#endif /* CONFIG_SYSCTL */
#endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 38b6dae..503e375 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,6 +18,7 @@ struct netns_ct {
struct nf_conntrack_ecache *ecache;
#endif
int sysctl_checksum;
+ unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 75871b1..af69acc 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -278,7 +278,7 @@ static ctl_table ip_ct_sysctl_table[] = {
{
.ctl_name = ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index a006080..6813f1c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -736,6 +736,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct net *net = nf_ct_net(ct);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
union nf_inet_addr *saddr;
struct nf_conntrack_tuple tuple;
@@ -775,7 +776,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
rcu_read_lock();
do {
- exp = __nf_ct_expect_find(&init_net, &tuple);
+ exp = __nf_ct_expect_find(net, &tuple);
if (!exp || exp->master == ct ||
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
--
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 5d5ae55..03e218f 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -8,6 +8,7 @@
#ifndef _NF_CONNTRACK_ACCT_H
#define _NF_CONNTRACK_ACCT_H
+#include <net/net_namespace.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack.h>
@@ -18,8 +19,6 @@ struct nf_conn_counter {
u_int64_t bytes;
};
-extern int nf_ct_acct;
-
static inline
struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
{
@@ -29,9 +28,10 @@ struct nf_conn_counter *nf_conn_acct_find(const struct nf_conn *ct)
static inline
struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conn_counter *acct;
- if (!nf_ct_acct)
+ if (!net->ct.sysctl_acct)
return NULL;
acct = nf_ct_ext_add(ct, NF_CT_EXT_ACCT, gfp);
@@ -45,7 +45,7 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
extern unsigned int
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
-extern int nf_conntrack_acct_init(void);
-extern void nf_conntrack_acct_fini(void);
+extern int nf_conntrack_acct_init(struct net *net);
+extern void nf_conntrack_acct_fini(struct net *net);
#endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 503e375..f4498a6 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -17,10 +17,12 @@ struct netns_ct {
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
+ int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
+ struct ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index dfb826c..c1504f7 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1210,6 +1210,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
union nf_inet_addr *addr,
__be16 port)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
@@ -1219,7 +1220,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
tuple.dst.u.tcp.port = port;
tuple.dst.protonum = IPPROTO_TCP;
- exp = __nf_ct_expect_find(&init_net, &tuple);
+ exp = __nf_ct_expect_find(net, &tuple);
if (exp && exp->master == ct)
return exp;
return NULL;
--
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index e47d5de..373e51e 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -98,6 +98,7 @@ EXPORT_SYMBOL(pptp_msg_name);
static void pptp_expectfn(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ struct net *net = nf_ct_net(ct);
typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn;
pr_debug("increasing timeouts\n");
@@ -121,7 +122,7 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple(&inv_t);
- exp_other = nf_ct_expect_find_get(&init_net, &inv_t);
+ exp_other = nf_ct_expect_find_get(net, &inv_t);
if (exp_other) {
/* delete other expectation. */
pr_debug("found\n");
@@ -134,7 +135,8 @@ static void pptp_expectfn(struct nf_conn *ct,
rcu_read_unlock();
}
-static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
+static int destroy_sibling_or_exp(struct net *net,
+ const struct nf_conntrack_tuple *t)
{
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
@@ -143,7 +145,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
pr_debug("trying to timeout ct or exp for tuple ");
nf_ct_dump_tuple(t);
- h = nf_conntrack_find_get(&init_net, t);
+ h = nf_conntrack_find_get(net, t);
if (h) {
sibling = nf_ct_tuplehash_to_ctrack(h);
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -154,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
nf_ct_put(sibling);
return 1;
} else {
- exp = nf_ct_expect_find_get(&init_net, t);
+ exp = nf_ct_expect_find_get(net, t);
if (exp) {
pr_debug("unexpect_related of expect %p\n", exp);
nf_ct_unexpect_related(exp);
@@ -168,6 +170,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t)
/* timeout GRE data ...* make keymap list per-netns
* per-netns keymal lock (not strictly necessary)
* flush keymap at netns stop and module unload.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 535e421..2a10efd 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,7 +87,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
-extern void nf_ct_gre_keymap_flush(void);
+extern void nf_ct_gre_keymap_flush(struct net *net);
extern void nf_nat_need_gre(void);
#endif /* __KERNEL__ */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5db7df5..e47d5de 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -602,7 +602,7 @@ static int __init nf_conntrack_pptp_init(void)
static void __exit nf_conntrack_pptp_fini(void)
{
nf_conntrack_helper_unregister(&pptp);
- nf_ct_gre_keymap_flush();
+ nf_ct_gre_keymap_flush(&init_net);
}
module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 5b1273a..a2cdbcb 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -29,8 +29,11 @@
#include <linux/list.h>
#include <linux/seq_file.h>
#include <linux/in.h>
+#include <linux/netdevice.h>
#include <linux/skbuff.h>
-
+#include <net/dst.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -40,19 +43,23 @@
#define GRE_TIMEOUT (30 * HZ)
#define GRE_STREAM_TIMEOUT (180 * HZ)
-static DEFINE_RWLOCK(nf_ct_gre_lock);
-static ...First, allow entry in notifier hook.
Second, start conntrack cleanup in netns to which netdevice belongs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5e1c817..65c811b 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -119,9 +119,7 @@ static int masq_device_event(struct notifier_block *this,
void *ptr)
{
const struct net_device *dev = ptr;
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
+ struct net *net = dev_net(dev);
if (event == NETDEV_DOWN) {
/* Device was downed. Search entire table for
@@ -129,7 +127,7 @@ static int masq_device_event(struct notifier_block *this,
and forget them. */
NF_CT_ASSERT(dev->ifindex != 0);
- nf_ct_iterate_cleanup(&init_net, device_cmp,
+ nf_ct_iterate_cleanup(net, device_cmp,
(void *)(long)dev->ifindex);
}
--
Same story as with iptable_filter, iptables_raw tables.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a6ed838..b286b84 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -38,6 +38,7 @@ struct netns_ipv4 {
struct xt_table *iptable_raw;
struct xt_table *arptable_filter;
struct xt_table *iptable_security;
+ struct xt_table *nat_table;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index e8b4d0d..0a02a8c 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -33,7 +33,7 @@ static struct
struct ipt_replace repl;
struct ipt_standard entries[3];
struct ipt_error term;
-} nat_initial_table __initdata = {
+} nat_initial_table __net_initdata = {
.repl = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
@@ -58,14 +58,13 @@ static struct
.term = IPT_ERROR_INIT, /* ERROR */
};
-static struct xt_table __nat_table = {
+static struct xt_table nat_table = {
.name = "nat",
.valid_hooks = NAT_VALID_HOOKS,
.lock = __RW_LOCK_UNLOCKED(__nat_table.lock),
.me = THIS_MODULE,
.af = AF_INET,
};
-static struct xt_table *nat_table;
/* Source NAT */
static unsigned int ipt_snat_target(struct sk_buff *skb,
@@ -194,9 +193,10 @@ int nf_nat_rule_find(struct sk_buff *skb,
const struct net_device *out,
struct nf_conn *ct)
{
+ struct net *net = nf_ct_net(ct);
int ret;
- ret = ipt_do_table(skb, hooknum, in, out, nat_table);
+ ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
if (ret == NF_ACCEPT) {
if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -226,14 +226,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
.family = AF_INET,
};
+static int __net_init nf_nat_rule_net_init(struct net *net)
+{
+ net->ipv4.nat_table = ipt_register_table(net, &nat_table,
+ ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b286b84..ece1c92 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -39,6 +39,8 @@ struct netns_ipv4 {
struct xt_table *arptable_filter;
struct xt_table *iptable_security;
struct xt_table *nat_table;
+ struct hlist_head *nat_bysource;
+ int nat_vmalloced;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 5d4a5b7..2ac9eaf 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly;
/* Calculated at init based on memory size */
static unsigned int nf_nat_htable_size __read_mostly;
-static int nf_nat_vmalloced;
-
-static struct hlist_head *bysource __read_mostly;
#define MAX_IP_NAT_PROTO 256
static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
@@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct,
/* Only called for SRC manip */
static int
-find_appropriate_src(const struct nf_conntrack_tuple *tuple,
+find_appropriate_src(struct net *net,
+ const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
@@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
const struct hlist_node *n;
rcu_read_lock();
- hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
+ hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
@@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
const struct nf_nat_protocol *proto;
/* 1) If this srcip/proto/src-proto-part is currently ...Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 0a02a8c..f929352 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -91,13 +91,13 @@ static unsigned int ipt_snat_target(struct sk_buff *skb,
}
/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip)
{
static int warned = 0;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
struct rtable *rt;
- if (ip_route_output_key(&init_net, &rt, &fl) != 0)
+ if (ip_route_output_key(net, &rt, &fl) != 0)
return;
if (rt->rt_src != srcip && !warned) {
@@ -130,7 +130,7 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb,
if (hooknum == NF_INET_LOCAL_OUT &&
mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle(ip_hdr(skb)->daddr,
+ warn_if_extra_mangle(dev_net(out), ip_hdr(skb)->daddr,
mr->range[0].min_ip);
return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
--
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e4bdddc..9eb1710 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp");
static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ struct net *net = nf_ct_net(ct);
const struct nf_conn *master = ct->master;
struct nf_conntrack_expect *other_exp;
struct nf_conntrack_tuple t;
@@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple_ip(&t);
- other_exp = nf_ct_expect_find_get(&init_net, &t);
+ other_exp = nf_ct_expect_find_get(net, &t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
nf_ct_expect_put(other_exp);
--
From kernel perspective, allow entrance in nf_hook_slow().
Stuff which uses nf_register_hook/nf_register_hooks, but otherwise not netns-ready:
DECnet netfilter
ipt_CLUSTERIP
nf_nat_standalone.c together with XFRM (?)
IPVS
several individual match modules (like hashlimit)
ctnetlink
NOTRACK
all sorts of queueing and reporting to userspace
L3 and L4 protocol sysctls, bridge sysctls
probably something else
Anyway critical mass has been achieved, there is no reason to hide netfilter any longer.
From userspace perspective, allow to manipulate all sorts of
iptables/ip6tables/arptables rules.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b16cd79..a90ac83 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -165,14 +165,6 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
unsigned int verdict;
int ret = 0;
-#ifdef CONFIG_NET_NS
- struct net *net;
-
- net = indev == NULL ? dev_net(outdev) : dev_net(indev);
- if (net != &init_net)
- return 1;
-#endif
-
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index f9b46de..8ab829f 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -65,9 +65,6 @@ static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf,
{
struct nf_sockopt_ops *ops;
- if (!net_eq(sock_net(sk), &init_net))
- return ERR_PTR(-ENOPROTOOPT);
-
if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0)
return ERR_PTR(-EINTR);
--
Applied. thanks Alexey. Is there an easy way to test all this stuff? --
I used the following: 0) netns is currently mutually exclusive with sysfs, so depending on sanity of distro initscripts booting sysfs-less kernel can be tricky. In Gentoo, for example a) rm -rf /sys (sic!), b) RC_USE_FSTAB="yes", c) RC_DEVICES="static" in /etc/conf.d/rc are needed. 1) netns creation tool (attached, some container guy posted it somewhere) # ns_exec -n /bin/sh 2) shutdown network in init_net sudo /etc/init.d/ntpd stop sudo /etc/init.d/sshd stop sudo /etc/init.d/iptables stop sudo /etc/init.d/ip6tables stop sudo /etc/init.d/net.eth1 stop sudo /etc/init.d/net.eth0 stop 3) move netdevices to netns ip l s dev eth0 netns "$1" ip l s dev eth1 netns "$1" where $1 is PID of shell from 1) 4) in netns start everything back sudo /etc/init.d/net.eth0 start sudo /etc/init.d/net.eth1 start sudo /etc/init.d/iptables start sudo /etc/init.d/ip6tables start sudo /etc/init.d/sshd start sudo /etc/init.d/ntpd start 5) at this point my usual NAT setup is back working for me and everything should be like in init_net (modulo aforementioned exceptions) and independent from init_net. Leaked netns are in /proc/slabinfo under "net_namespace". Some IPv6 printks can be annoying, so mute them. Object poisoning with SLUB won't work for irrelevant reasons, so use SLAB. Something like that. --
