On Tue, 12 Aug 2008, Sven Wegener wrote:
I wondered if this whole thing can ever be totally race condition free,
without changing how destinations are purged from the trash. Initial
version of the patch below. Basically it pushes the locking up into
ip_vs_lblc_schedule() and rearranges the code to be safe that we have a
valid destination.
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 7a6a319..67f7b04 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -123,31 +123,6 @@ static ctl_table vs_vars_table[] = {
static struct ctl_table_header * sysctl_header;
-/*
- * new/free a ip_vs_lblc_entry, which is a mapping of a destionation
- * IP address to a server.
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
-{
- struct ip_vs_lblc_entry *en;
-
- en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
- if (en == NULL) {
- IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
- return NULL;
- }
-
- INIT_LIST_HEAD(&en->list);
- en->addr = daddr;
-
- atomic_inc(&dest->refcnt);
- en->dest = dest;
-
- return en;
-}
-
-
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
@@ -189,10 +164,8 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
*/
hash = ip_vs_lblc_hashkey(en->addr);
- write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
- write_unlock(&tbl->lock);
return 1;
}
@@ -209,23 +182,54 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
hash = ip_vs_lblc_hashkey(addr);
- read_lock(&tbl->lock);
-
list_for_each_entry(en, &tbl->bucket[hash], list) {
if (en->addr == addr) {
/* HIT */
- read_unlock(&tbl->lock);
return en;
}
}
- read_unlock(&tbl->lock);
-
return NULL;
}
/*
+ * new/free a ip_vs_lblc_entry, which is a mapping of a destionation
+ * IP address to a server.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_lblc_entry *en;
+
+ en = ip_vs_lblc_get(tbl, daddr);
+ if (!en) {
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en) {
+ IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&en->list);
+ en->addr = daddr;
+ en->lastuse = jiffies;
+
+ atomic_inc(&dest->refcnt);
+ en->dest = dest;
+
+ ip_vs_lblc_hash(tbl, en);
+ } else if (en->dest != dest) {
+ atomic_dec(&en->dest->refcnt);
+ atomic_inc(&dest->refcnt);
+ en->dest = dest;
+ }
+
+ return en;
+}
+
+
+/*
* Flush all the entries of the specified table.
*/
static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
@@ -484,7 +488,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct ip_vs_dest *dest;
+ struct ip_vs_dest *dest = NULL;
struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en;
struct iphdr *iph = ip_hdr(skb);
@@ -492,38 +496,43 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
tbl = (struct ip_vs_lblc_table *)svc->sched_data;
+ /* First look in our cache */
+ read_lock(&tbl->lock);
en = ip_vs_lblc_get(tbl, iph->daddr);
- if (en == NULL) {
- dest = __ip_vs_wlc_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- en = ip_vs_lblc_new(iph->daddr, dest);
- if (en == NULL) {
- return NULL;
- }
- ip_vs_lblc_hash(tbl, en);
- } else {
+ if (en) {
dest = en->dest;
- if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
- || atomic_read(&dest->weight) <= 0
- || is_overloaded(dest, svc)) {
- dest = __ip_vs_wlc_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- atomic_dec(&en->dest->refcnt);
- atomic_inc(&dest->refcnt);
- en->dest = dest;
- }
+ en->lastuse = jiffies;
+
+ /*
+ * If the destination is not available, i.e. it's in the trash,
+ * ignore it, as it may be removed from under our feet
+ */
+
+ if (!(dest->flags & IP_VS_DEST_F_AVAILABLE))
+ dest = NULL;
}
- en->lastuse = jiffies;
+ read_unlock(&tbl->lock);
+
+ /* If the destination has a weight and is not overloaded, use it */
+ if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+ goto out;
+
+ /* No cache entry or it is invalid, time to schedule */
+ dest = __ip_vs_wlc_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ return NULL;
+ }
+
+ /* If we fail to create a cache entry, we'll just use the valid dest */
+ write_lock(&tbl->lock);
+ ip_vs_lblc_new(tbl, iph->daddr, dest);
+ write_unlock(&tbl->lock);
+out:
IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
- NIPQUAD(en->addr),
+ NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
ntohs(dest->port));
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to
majordomo@vger.kernel.org
More majordomo info at
http://vger.kernel.org/majordomo-info.html