This patch series adds load-balancing of UDP SIP based on Call-ID to IPVS as well as a frame-work for extending IPVS to handle alternate persistence requirements. REVISIONS This is "patch v1" of this series, which addresses a few minor problems, as annotated on a per-patch basis, since the initial "rfc" posting. Internally there were 4 rfc versions, 0.1, 0.2, 0.3 and 0.4, some of the notes for some of the patches reflect those versions. OVERVIEW The approach that I have taken is what I call persistence engines. The basic idea being that you can provide a module to LVS that alters the way that it handles connection templates, which are at the core of persistence. In particular, an additional key can be added, and any of the normal IP address, port and protocol information can either be used or ignored. In the case of the SIP persistence engine, the only persistence engine, all the keys used by the default persistence behaviour are used and the callid is added as an extra key. I originally intended to ignore the cip, but this can optionally be done by setting the persistence mask (-M) to 0.0.0.0 while allowing the flexibility of other mask values. It is envisaged that the SIP persistence engine will be used in conjunction with one-packet scheduling. I'm interested to hear if that doesn't fit your needs. CONFIGURATION A persistence engine is associated with a virtual service (as are schedulers). I have added the --pe option to the ivpsadm -A and -E commands to allow the persistence engine of a virtual service to be added, changed, or deleted. e.g. ipvsadm -A -u 10.4.3.192:5060 -p 60 -M 0.0.0.0 -o --pe sip There are no other configuration parameters at this time. RUNNING When a connection template is created, if its virtual service has a persistence engine, then the persistence engine can add an extra key to the connection template. For the SIP module this is the callid. More generically, it is known as "pe data". And both the name of the persistence ...
This simplifies caller logic sightly.
Signed-off-by: Simon Horman <horms@verge.net.au>
Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_ctl.c
===================================================================
--- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_ctl.c 2010-07-22 21:52:23.000000000 +0900
+++ nf-next-2.6/net/netfilter/ipvs/ip_vs_ctl.c 2010-07-22 21:54:38.000000000 +0900
@@ -1167,7 +1167,7 @@ ip_vs_add_service(struct ip_vs_service_u
if (sched == NULL) {
pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
ret = -ENOENT;
- goto out_mod_dec;
+ goto out_err;
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1227,7 +1227,7 @@ ip_vs_add_service(struct ip_vs_service_u
*svc_p = svc;
return 0;
- out_err:
+ out_err:
if (svc != NULL) {
if (svc->scheduler)
ip_vs_unbind_scheduler(svc);
@@ -1240,7 +1240,6 @@ ip_vs_add_service(struct ip_vs_service_u
}
ip_vs_scheduler_put(sched);
- out_mod_dec:
/* decrease the module use count */
ip_vs_use_count_dec();
@@ -1323,10 +1322,7 @@ ip_vs_edit_service(struct ip_vs_service
#ifdef CONFIG_IP_VS_IPV6
out:
#endif
-
- if (old_sched)
- ip_vs_scheduler_put(old_sched);
-
+ ip_vs_scheduler_put(old_sched);
return ret;
}
@@ -1350,8 +1346,7 @@ static void __ip_vs_del_service(struct i
/* Unbind scheduler */
old_sched = svc->scheduler;
ip_vs_unbind_scheduler(svc);
- if (old_sched)
- ip_vs_scheduler_put(old_sched);
+ ip_vs_scheduler_put(old_sched);
/* Unbind app inc */
if (svc->inc) {
Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c
===================================================================
--- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_sched.c 2010-07-22 21:52:23.000000000 +0900
+++ nf-next-2.6/net/netfilter/ipvs/ip_vs_sched.c 2010-07-22 21:55:42.000000000 +0900
@@ -159,7 +159,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{
- if (scheduler->module)
+ if (scheduler && scheduler->module)
...Signed-off-by: Simon Horman <horms@verge.net.au>
---
The motivation for this is to allow persistence engine modules to
fill in the parameters.
v0.3: Add missing changes to ip_vs_ftp.c
v0.1: Initial release
Index: nf-next-2.6/include/net/ip_vs.h
===================================================================
--- nf-next-2.6.orig/include/net/ip_vs.h 2010-07-28 22:02:23.000000000 +0900
+++ nf-next-2.6/include/net/ip_vs.h 2010-07-28 22:02:26.000000000 +0900
@@ -355,6 +355,15 @@ struct ip_vs_protocol {
extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+struct ip_vs_conn_param {
+ const union nf_inet_addr *caddr;
+ const union nf_inet_addr *vaddr;
+ __be16 cport;
+ __be16 vport;
+ __u16 protocol;
+ u16 af;
+};
+
/*
* IP_VS structure allocated for each dynamically scheduled connection
*/
@@ -624,13 +633,23 @@ enum {
IP_VS_DIR_LAST,
};
-extern struct ip_vs_conn *ip_vs_conn_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
-
-extern struct ip_vs_conn *ip_vs_ct_in_get
-(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
- const union nf_inet_addr *d_addr, __be16 d_port);
+static inline void ip_vs_conn_fill_param(int af, int protocol,
+ const union nf_inet_addr *caddr,
+ __be16 cport,
+ const union nf_inet_addr *vaddr,
+ __be16 vport,
+ struct ip_vs_conn_param *p)
+{
+ p->af = af;
+ p->protocol = protocol;
+ p->caddr = caddr;
+ p->cport = cport;
+ p->vaddr = vaddr;
+ p->vport = vport;
+}
+
+struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
+struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
@@ -638,9 +657,7 @@ struct ip_vs_conn * ip_vs_conn_in_get_pr
unsigned int proto_off,
int inverse);
-extern ...This shouldn't break compatibility with userspace as the new data
is at the end of the line.
I have confirmed that this doesn't break ipvsadm, the main (only?)
user-space user of this data.
Signed-off-by: Simon Horman <horms@verge.net.au>
* Jan Engelhardt suggested using netlink to do this, but it seems like
overkill to me. I'm willing to be convinced otherwise.
Index: nf-next-2.6/include/net/ip_vs.h
===================================================================
--- nf-next-2.6.orig/include/net/ip_vs.h 2010-07-26 07:51:10.000000000 +0900
+++ nf-next-2.6/include/net/ip_vs.h 2010-07-26 16:39:42.000000000 +0900
@@ -569,6 +569,7 @@ struct ip_vs_pe {
bool (*ct_match)(const struct ip_vs_conn_param *p,
struct ip_vs_conn *ct);
u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval);
+ int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf);
};
/*
Index: nf-next-2.6/net/netfilter/ipvs/ip_vs_conn.c
===================================================================
--- nf-next-2.6.orig/net/netfilter/ipvs/ip_vs_conn.c 2010-07-26 07:51:10.000000000 +0900
+++ nf-next-2.6/net/netfilter/ipvs/ip_vs_conn.c 2010-07-26 16:39:38.000000000 +0900
@@ -921,30 +921,44 @@ static int ip_vs_conn_seq_show(struct se
if (v == SEQ_START_TOKEN)
seq_puts(seq,
- "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires\n");
+ "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
else {
const struct ip_vs_conn *cp = v;
+ char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
+ size_t len = 0;
+
+ if (cp->dest->svc->pe && cp->dest->svc->pe->show_pe_data) {
+ pe_data[0] = ' ';
+ len = strlen(cp->dest->svc->pe->name);
+ memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+ pe_data[len + 1] = ' ';
+ len += 2;
+ len += cp->dest->svc->pe->show_pe_data(cp,
+ pe_data + len);
+ }
+ pe_data[len] = '\0';
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == ...Just to clarify, do you want me to merge this version? --
Assuming that it applies cleanly, yes. Otherwise I can re-base an re-post. --
There are some smaller conflicts, please rebase to the current nf-next tree. Thanks. --
Unfortunately a few bugs have shown up. I'll get them ironed out before reposting. --
