From: Jamal Hadi Salim <hadi@cyberus.ca>
This patchset implements manipulation of ipsec tables with the
"mark" tag. You can config the SAD and SPD from user space
with a specified mark.
Example:
---
#ip xfrm state add src 192.168.2.100 dst 192.168.1.10 proto esp spi 0x00000301 mode tunnel mark 7 auth md5 0x96358c90783bbfa3d7b196ceabe0536b enc des3_ede 0xf6ddb555acfd9d77b03ea3843f2653255afe8eb5573965df
#ip xfrm state get mark 7 src 192.168.2.100 dst 192.168.1.10 proto esp spi 0x00000301
src 192.168.2.100 dst 192.168.1.10
proto esp spi 0x00000301 reqid 0 mode tunnel
replay-window 0
mark 7/0xffffffff
auth hmac(md5) 0x96358c90783bbfa3d7b196ceabe0536b
enc cbc(des3_ede) 0xf6ddb555acfd9d77b03ea3843f2653255afe8eb5573965df
sel src 0.0.0.0/0 dst 0.0.0.0/0
#
#
#ip xfrm policy add src 172.16.2.0/24 dst 172.16.1.0/24 \
dir fwd ptype main \
tmpl src 192.168.2.100 dst 192.168.1.100 \
proto esp mode tunnel mark 7 mask 0xffffffff
#
#ip xfrm policy ls
src 172.16.2.0/24 dst 172.16.1.0/24
dir fwd priority 0 ptype main
mark 7/0xffffffff
tmpl src 192.168.2.100 dst 192.168.1.100
proto esp reqid 0 mode tunnel
-----
A mark-configured SAD/SPD entry will use the mark as part of the
lookup key (both in data and control path).
Example:
---
# ip xfrm pol get src 172.16.2.0/24 dst 172.16.1.0/24 dir fwd
RTNETLINK answers: No such file or directory
# ip xfrm pol get src 172.16.2.0/24 dst 172.16.1.0/24 dir fwd mark 7
src 172.16.2.0/24 dst 172.16.1.0/24
dir fwd priority 0 ptype main
mark 7/0xffffffff
tmpl src 192.168.2.100 dst 192.168.1.100
proto esp reqid 0 mode tunnel
---
I could probably have broken down the last two patches into more
than one, but it was easier to do it this way. If there is strong
feeling to do that let me know.
Oh, and this is an attempt to use git-send-mail - so i would appreciate
any feedback on what i can ...From: Jamal Hadi Salim <hadi@cyberus.ca>
Add basic structuring and accessors for xfrm mark
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
---
include/linux/xfrm.h | 12 +++++++++---
include/net/xfrm.h | 28 ++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 29e04be..887c533 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -267,8 +267,8 @@ enum xfrm_attr_type_t {
XFRMA_ALG_COMP, /* struct xfrm_algo */
XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */
XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */
- XFRMA_SA,
- XFRMA_POLICY,
+ XFRMA_SA, /* struct xfrm_usersa_info */
+ XFRMA_POLICY, /*struct xfrm_userpolicy_info */
XFRMA_SEC_CTX, /* struct xfrm_sec_ctx */
XFRMA_LTIME_VAL,
XFRMA_REPLAY_VAL,
@@ -276,17 +276,23 @@ enum xfrm_attr_type_t {
XFRMA_ETIMER_THRESH,
XFRMA_SRCADDR, /* xfrm_address_t */
XFRMA_COADDR, /* xfrm_address_t */
- XFRMA_LASTUSED,
+ XFRMA_LASTUSED, /* unsigned long */
XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */
XFRMA_MIGRATE,
XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */
XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */
XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */
+ XFRMA_MARK, /* u32 */
__XFRMA_MAX
#define XFRMA_MAX (__XFRMA_MAX - 1)
};
+struct xfrm_umark {
+ __u32 v; /* value */
+ __u32 m; /* mask */
+};
+
enum xfrm_sadattr_type_t {
XFRMA_SAD_UNSPEC,
XFRMA_SAD_CNT,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0beb413..904527f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -123,6 +123,11 @@ struct xfrm_state_walk {
u32 seq;
};
+struct xfrm_kmark {
+ u32 v; /* value */
+ u32 m; /* mask */
+};
+
/* Full description of state of transformer. */
struct xfrm_state {
#ifdef CONFIG_NET_NS
@@ -140,6 +145,7 @@ struct xfrm_state {
struct xfrm_id id;
struct ...From: Jamal Hadi Salim <hadi@cyberus.ca>
Allow mark to be used when doing SP lookup
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
---
net/xfrm/xfrm_policy.c | 12 +++++++++++-
1 files changed, 11 insertions(+), 1 deletions(-)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e804aa5..38f40c4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -556,6 +556,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct hlist_head *chain;
struct hlist_node *entry, *newpos;
struct dst_entry *gc_list;
+ u32 mark = policy->mark.v & policy->mark.m;
write_lock_bh(&xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
@@ -564,6 +565,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_for_each_entry(pol, entry, chain, bydst) {
if (pol->type == policy->type &&
!selector_cmp(&pol->selector, &policy->selector) &&
+ (mark & pol->mark.m) == pol->mark.v &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl) {
@@ -650,6 +652,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
ret = NULL;
hlist_for_each_entry(pol, entry, chain, bydst) {
if (pol->type == type &&
+ (mark & pol->mark.m) == pol->mark.v &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security)) {
xfrm_pol_hold(pol);
@@ -692,7 +695,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
chain = net->xfrm.policy_byidx + idx_hash(net, id);
ret = NULL;
hlist_for_each_entry(pol, entry, chain, byidx) {
- if (pol->type == type && pol->index == id) {
+ if (pol->type == type && pol->index == id &&
+ (mark & pol->mark.m) == pol->mark.v) {
xfrm_pol_hold(pol);
if (delete) {
*err = security_xfrm_policy_delete(
@@ -909,6 +913,7 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi ...From: Jamal Hadi Salim <hadi@cyberus.ca>
Add ability for netlink userspace to manipulate the SPD
and manipulate the mark, retrieve it and get events with a defined
mark, etc.
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
---
net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------
1 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 57b38a3..17a7d51 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,8 +31,6 @@
#include <linux/in6.h>
#endif
-#define DUMMY_MARK 0
-
static inline int aead_len(struct xfrm_algo_aead *alg)
{
return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
@@ -1234,6 +1232,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
if (err)
goto error;
+ xfrm_mark_get(attrs, &xp->mark);
+
return xp;
error:
*errp = err;
@@ -1380,10 +1380,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
goto nlmsg_failure;
if (copy_to_user_policy_type(xp->type, skb) < 0)
goto nlmsg_failure;
+ if (xfrm_mark_put(skb, &xp->mark))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
+nla_put_failure:
nlmsg_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -1455,6 +1458,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
struct km_event c;
int delete;
+ struct xfrm_kmark m;
+ u32 mark = xfrm_mark_get(attrs, &m);
p = nlmsg_data(nlh);
delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
@@ -1468,7 +1473,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
if (p->index)
- xp = xfrm_policy_byid(net, DUMMY_MARK, type, p->dir, p->index, delete, &err);
+ xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -1485,7 +1490,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct ...From: Jamal Hadi Salim <hadi@cyberus.ca>
Add ability for netlink userspace to manipulate the SAD
and manipulate the mark, retrieve it and get events with a defined
mark.
MIGRATE may be added later.
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
---
net/xfrm/xfrm_user.c | 72 +++++++++++++++++++++++++++++++++++++++----------
1 files changed, 57 insertions(+), 15 deletions(-)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6abe4c0..57b38a3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -32,7 +32,6 @@
#endif
#define DUMMY_MARK 0
-static struct xfrm_kmark dummy_mark = {0, 0};
static inline int aead_len(struct xfrm_algo_aead *alg)
{
@@ -449,6 +448,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
+ xfrm_mark_get(attrs, &x->mark);
+
err = xfrm_init_state(x);
if (err)
goto error;
@@ -529,11 +530,13 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
int *errp)
{
struct xfrm_state *x = NULL;
+ struct xfrm_kmark m;
int err;
+ u32 mark = xfrm_mark_get(attrs, &m);
if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) {
err = -ESRCH;
- x = xfrm_state_lookup(net, DUMMY_MARK, &p->daddr, p->spi, p->proto, p->family);
+ x = xfrm_state_lookup(net, mark, &p->daddr, p->spi, p->proto, p->family);
} else {
xfrm_address_t *saddr = NULL;
@@ -544,7 +547,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net,
}
err = -ESRCH;
- x = xfrm_state_lookup_byaddr(net, DUMMY_MARK, &p->daddr, saddr,
+ x = xfrm_state_lookup_byaddr(net, mark,
+ &p->daddr, saddr,
p->proto, p->family);
}
@@ -686,6 +690,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (x->encap)
NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
+ if (xfrm_mark_put(skb, &x->mark))
+ goto nla_put_failure;
+
if (x->security && copy_sec_ctx(x->security, skb) < 0)
goto nla_put_failure;
@@ -950,6 ...From: Jamal Hadi Salim <hadi@cyberus.ca> pass mark to all SP lookups to prepare them for when we add code to have them search. Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca> --- include/net/xfrm.h | 5 +++-- net/key/af_key.c | 4 ++-- net/xfrm/xfrm_policy.c | 8 ++++---- net/xfrm/xfrm_user.c | 10 +++++----- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 96b6953..676f71b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1456,11 +1456,12 @@ extern int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, + u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u8, int dir, u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); diff --git a/net/key/af_key.c b/net/key/af_key.c index 4d652c8..31c87df 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2348,7 +2348,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(net, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2596,7 +2596,7 @@ ...
From: Jamal Hadi Salim <hadi@cyberus.ca>
Allow mark to be added to the SA lookup
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
---
net/xfrm/xfrm_state.c | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index fcf0d9f..151c6d6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -678,6 +678,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad
xfrm_addr_cmp(&x->id.daddr, daddr, family))
continue;
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
xfrm_state_hold(x);
return x;
}
@@ -698,6 +700,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
xfrm_addr_cmp(&x->props.saddr, saddr, family))
continue;
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
xfrm_state_hold(x);
return x;
}
@@ -790,6 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_state_addr_check(x, daddr, saddr, family) &&
tmpl->mode == x->props.mode &&
@@ -805,6 +810,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
if (x->props.family == family &&
x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
!(x->props.flags & XFRM_STATE_WILDRECV) &&
xfrm_state_addr_check(x, daddr, saddr, family) &&
tmpl->mode == x->props.mode &&
@@ -888,6 +894,7 @@ xfrm_stateonly_find(struct net *net, u32 mark,
hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
+ (mark & x->mark.m) == x->mark.v &&
...From: Jamal Hadi Salim <hadi@cyberus.ca> pass mark to all SA lookups to prepare them for when we add code to have them search. Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca> --- include/net/xfrm.h | 23 +++++++++++++----- net/core/pktgen.c | 3 +- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 6 +++- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 6 +++- net/ipv6/xfrm6_input.c | 2 +- net/key/af_key.c | 14 ++++++----- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_state.c | 58 ++++++++++++++++++++++++++++------------------- net/xfrm/xfrm_user.c | 17 ++++++++----- 13 files changed, 84 insertions(+), 55 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 904527f..96b6953 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1325,7 +1325,7 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); -extern struct xfrm_state * xfrm_stateonly_find(struct net *net, +extern struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, @@ -1334,8 +1334,14 @@ extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); -extern struct xfrm_state *xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); -extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark, + xfrm_address_t *daddr, __be32 ...
This doesn't look right. A mark value of 0 with a mask of ~0 won't --
Good point, thanks. I will make that change; Rest of patches look reasonable? cheers, jamal --
I couldn't spot any further problems so far. One related feature which would be nice to have is the ability to use marks for xfrm tunnel routing. But I'm not sure we can do this in a backwards compatible way. --
I take it policy routing by mark is insufficient. If you have time, can you give me an example setup description of that and why it would be hard to be backward-compat? If there's anything i can do in these patches to help, I will be more than happy to. cheers, jamal --
A couple of years ago I used this in a multipath setup, which was using CONNMARK to persistently bind connections (tunnels in this case) to a route after the first selection. The problem with backwards compatibility is that people using marks for multipath routing are most likely not expecting the mark to suddenly take effect for IPsec tunnel routing. --
I see. Is there a historical reason why it hasnt been used this way? Reminds me of the reverse path patch i sent a while back that The main reason it works ok for ipsec/policy-routing is because user space essentially pins down the kernel path. Could you not solve it via some user space daemon? First packet/event to user space, download policies and wait until it expires or route/tunnel goes down to react.. One of the problems maybe the semantics of what a general purpose tag like mark being left to either the programmer (as in connmark) or the admin (tc) - so building a general purpose daemon would have to enforce some semantic to work ok. cheers, jamal --
xfrm ignores policy routing. You can't route IPSEC in Linux. This is actually a fairly annoying limitation. The workaround is to do like Microsoft: Encapsulate everything in l2tp or gre. /Benny --
With these patches if you set policy routing mark, have the proper setting in the skb or socket for the mark then the proper route will be selected. If you have an SPD + SA added with the same mark, those will be used right after the route is selected. So essentially you have the same mark across. Does that solve or alleviate the problem? cheers, jamal --
I don't actually use marks at all, I do policy routing based on source address. Currently rules are based on source interface, but all IPSEC traffic comes from the same interface, unlike some tunnel-based solutions. Right now packets going out through an IPSEC tunnel do not hit the routing table at all -- they just get shunted into the tunnel. Anything that gives me the chance to run the packets through normal routing before the tunnel grabs them works for me. From your description, I would add the IPSEC SPD + SA with a specific mark. Then I would set the mark in the rule table if I want the packets to go through the tunnel, or clear the mark to have them go through normal routing. Not perfect, because I would have to replicate parts of the routing table in the rule table, but it could be made to work. Perfect would be if I could set mark in the routing table instead of the rule table, but sometimes perfect is the enemy of good... /Benny --
Agreed this is a problem and not a nice one (the counter arguement is This is actually an interesting idea and is not far-fetched (and would certainly get rid of the replication problem). If i understood correctly, you would have: ip route add blah blah mark 0x10 and that the routing core will use the mark to (as it does for example with ifindex) to pick the route? I like the idea for the simple fact it will reduce immensely configuration in some cases.. cheers, jamal --
It would certainly be handy for me... /Benny --
I would certainly be interested in adding this feature for the reasons described above. An additional interesting connection would be to tie this feature to grouping of netdevices for the purpose of multipath routing. This would be the same as what we do currently with bindtodevice but on a group instead of a single netdevice. It would require to also have general purpose netdev->mark to group multiple netdevices (for this case). The dev->mark could also be handy for other things (which have not been efficiently solved in the past); example, i could add mark 0x10 to all ppp* devices and then do "ip link ls mark 0x10" and it would only fetch ppp* (or for shit-and-giggles as some New Brunswickians like to say, ip link mark 0x10 down) Patrick, thoughts? see anything breaking from either feature? cheers, jamal --
