Re: [net-next PATCH v2] llc enhancements

Previous thread: [PATCH 7/8] sfc: QT2025C: Add error message for suspected bad SFP+ cables by Ben Hutchings on Wednesday, December 23, 2009 - 4:48 pm. (1 message)

Next thread: [PATCH 8/8] sfc: Disable TX descriptor prefetch watchdog by Ben Hutchings on Wednesday, December 23, 2009 - 4:49 pm. (1 message)
From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

This patch modifies the LLC code to scale the socket lookup code for a
large number of interfaces and large number of sockets bound to the
same SAP. We use it for STP traffic generation from a large number of
virtual STP ports, via virtual network interfaces.

In the process we converted the socket lookup code and sap list to use
RCU. It also contains some general cleanups (use dev_hard_header
instead of handcrafting the headers) and enhancements (LLC_OPT_PKTINFO).

This is the 2nd version. Changes from the previous version:

- added SO_BINDTODEVICE support for faster bind operations
- converted the socket lookup code and sap list to RCU
- optimized multicast delivery ala Eric
- remove some unused APIs (which should be private anyway)

Many thanks to Eric Dumazet for his continuous guidance and to Jarek
Poplawski for spotting a locking bug in the previous version.


--

From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/linux/llc.h    |    7 +++++++
 include/net/llc_conn.h |    1 +
 net/llc/af_llc.c       |   29 +++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/include/linux/llc.h b/include/linux/llc.h
index 7733585..ad7074b 100644
--- a/include/linux/llc.h
+++ b/include/linux/llc.h
@@ -36,6 +36,7 @@ enum llc_sockopts {
 	LLC_OPT_BUSY_TMR_EXP,	/* busy state expire time (secs). */
 	LLC_OPT_TX_WIN,		/* tx window size. */
 	LLC_OPT_RX_WIN,		/* rx window size. */
+	LLC_OPT_PKTINFO,	/* ancillary packet information. */
 	LLC_OPT_MAX
 };
 
@@ -70,6 +71,12 @@ enum llc_sockopts {
 #define LLC_SAP_RM	0xD4		/* Resource Management 		*/
 #define LLC_SAP_GLOBAL	0xFF		/* Global SAP. 			*/
 
+struct llc_pktinfo {
+	int lpi_ifindex;
+	unsigned char lpi_sap;
+	unsigned char lpi_mac[IFHWADDRLEN];
+};
+
 #ifdef __KERNEL__
 #define LLC_SAP_DYN_START	0xC0
 #define LLC_SAP_DYN_STOP	0xDE
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index e2374e3..fe982fd 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -76,6 +76,7 @@ struct llc_sock {
 	u32		    rx_pdu_hdr;	   /* used for saving header of last pdu
 					      received and caused sending FRMR.
 					      Used for resending FRMR */
+	u32		    cmsg_flags;
 };
 
 static inline struct llc_sock *llc_sk(const struct sock *sk)
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 3a66546..ac691fe 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -47,6 +47,10 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
 #define dprintk(args...)
 #endif
 
+/* Maybe we'll add some more in the future. */
+#define LLC_CMSG_PKTINFO	1
+
+
 /**
  *	llc_ui_next_link_no - return the next unused link number for a sap
  *	@sap: Address of sap to get link number from.
@@ -591,6 +595,20 @@ static int llc_wait_data(struct sock *sk, long timeo)
 	return rc;
 }
 
+static void llc_cmsg_rcv(struct ...
From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

Remove the list & lock from the exported API - it is not used anywhere
in the tree and most operations should be covered by llc_sap_open(),
llc_sap_close() and llc_sap_find().

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/net/llc.h  |    1 -
 net/llc/llc_core.c |    4 +---
 2 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index aee826d..7b9528b 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -92,7 +92,6 @@ struct hlist_nulls_head *llc_sk_laddr_hash(struct llc_sap *sap,
 #define LLC_DEST_CONN            2      /* Type 2 goes here */
 
 extern struct list_head llc_sap_list;
-extern spinlock_t llc_sap_list_lock;
 
 extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev);
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 21fe87e..3f4e85b 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -23,7 +23,7 @@
 #include <net/llc.h>
 
 LIST_HEAD(llc_sap_list);
-DEFINE_SPINLOCK(llc_sap_list_lock);
+static DEFINE_SPINLOCK(llc_sap_list_lock);
 
 /**
  *	llc_sap_alloc - allocates and initializes sap.
@@ -160,8 +160,6 @@ static void __exit llc_exit(void)
 module_init(llc_init);
 module_exit(llc_exit);
 
-EXPORT_SYMBOL(llc_sap_list);
-EXPORT_SYMBOL(llc_sap_list_lock);
 EXPORT_SYMBOL(llc_sap_find);
 EXPORT_SYMBOL(llc_sap_open);
 EXPORT_SYMBOL(llc_sap_close);
-- 
1.5.6.5

--

From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

Optimize multicast delivery by doing the actual delivery without
holding the lock. Based on the same approach used in UDP code.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 net/llc/llc_sap.c |   36 ++++++++++++++++++++++++++++--------
 1 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index f154ab5..6a06b89 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -355,6 +355,24 @@ static inline bool llc_mcast_match(const struct llc_sap *sap,
 	  llc->dev == skb->dev;
 }
 
+static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb,
+			 struct sock **stack, int count)
+{
+	struct sk_buff *skb1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		skb1 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb1) {
+			sock_put(stack[i]);
+			continue;
+		}
+
+		llc_sap_rcv(sap, skb1, stack[i]);
+		sock_put(stack[i]);
+	}
+}
+
 /**
  * 	llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets.
  *	@sap: SAP
@@ -367,25 +385,27 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			  const struct llc_addr *laddr,
 			  struct sk_buff *skb)
 {
-	struct sock *sk;
+	int i = 0, count = 256 / sizeof(struct sock *);
+	struct sock *sk, *stack[count];
 	struct hlist_nulls_node *node;
 
 	spin_lock_bh(&sap->sk_lock);
 	sk_nulls_for_each_rcu(sk, node, &sap->sk_list) {
-		struct sk_buff *skb1;
 
 		if (!llc_mcast_match(sap, laddr, skb, sk))
 			continue;
 
-		skb1 = skb_clone(skb, GFP_ATOMIC);
-		if (!skb1)
-			break;
-
 		sock_hold(sk);
-		llc_sap_rcv(sap, skb1, sk);
-		sock_put(sk);
+		if (i < count)
+			stack[i++] = sk;
+		else {
+			llc_do_mcast(sap, skb, stack, i);
+			i = 0;
+		}
 	}
 	spin_unlock_bh(&sap->sk_lock);
+
+	llc_do_mcast(sap, skb, stack, i);
 }
 
 
-- 
1.5.6.5

--

From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

Using dev_hard_header allows us to use LLC with VLANs and potentially
other Ethernet/TokernRing specific encapsulations. It also removes code
duplication between LLC and Ethernet/TokenRing core code.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 drivers/net/myri_sbus.c |    6 +++---
 net/8021q/vlan_dev.c    |    7 +++----
 net/ethernet/eth.c      |    6 +++---
 net/llc/llc_output.c    |   45 ++++++++-------------------------------------
 4 files changed, 17 insertions(+), 47 deletions(-)

diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index b3513ad..8b43130 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -716,10 +716,10 @@ static int myri_header(struct sk_buff *skb, struct net_device *dev,
 	pad[0] = MYRI_PAD_LEN;
 	pad[1] = 0xab;
 
-	/* Set the protocol type. For a packet of type ETH_P_802_3 we put the length
-	 * in here instead. It is up to the 802.2 layer to carry protocol information.
+	/* Set the protocol type. For a packet of type ETH_P_802_3/2 we put the
+	 * length in here instead.
 	 */
-	if (type != ETH_P_802_3)
+	if (type != ETH_P_802_3 && type != ETH_P_802_2)
 		eth->h_proto = htons(type);
 	else
 		eth->h_proto = htons(len);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b788978..77a49ff 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -263,11 +263,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
 		/*
-		 *  Set the protocol type. For a packet of type ETH_P_802_3 we
-		 *  put the length in here instead. It is up to the 802.2
-		 *  layer to carry protocol information.
+		 *  Set the protocol type. For a packet of type ETH_P_802_3/2 we
+		 *  put the length in here instead.
 		 */
-		if (type != ETH_P_802_3)
+		if (type != ETH_P_802_3 && type != ETH_P_802_2)
 			vhdr->h_vlan_encapsulated_proto = htons(type);
 		else
 			vhdr->h_vlan_encapsulated_proto = htons(len);
diff --git ...
From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/net/llc.h  |    2 +-
 net/llc/llc_core.c |   43 +++++++++++--------------------------------
 net/llc/llc_proc.c |   11 ++++-------
 3 files changed, 16 insertions(+), 40 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index b020185..aee826d 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -92,7 +92,7 @@ struct hlist_nulls_head *llc_sk_laddr_hash(struct llc_sap *sap,
 #define LLC_DEST_CONN            2      /* Type 2 goes here */
 
 extern struct list_head llc_sap_list;
-extern rwlock_t llc_sap_list_lock;
+extern spinlock_t llc_sap_list_lock;
 
 extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
 		   struct packet_type *pt, struct net_device *orig_dev);
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 0a5900d..21fe87e 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -23,7 +23,7 @@
 #include <net/llc.h>
 
 LIST_HEAD(llc_sap_list);
-DEFINE_RWLOCK(llc_sap_list_lock);
+DEFINE_SPINLOCK(llc_sap_list_lock);
 
 /**
  *	llc_sap_alloc - allocates and initializes sap.
@@ -46,30 +46,6 @@ static struct llc_sap *llc_sap_alloc(void)
 	return sap;
 }
 
-/**
- *	llc_add_sap - add sap to station list
- *	@sap: Address of the sap
- *
- *	Adds a sap to the LLC's station sap list.
- */
-static void llc_add_sap(struct llc_sap *sap)
-{
-	list_add_tail(&sap->node, &llc_sap_list);
-}
-
-/**
- *	llc_del_sap - del sap from station list
- *	@sap: Address of the sap
- *
- *	Removes a sap to the LLC's station sap list.
- */
-static void llc_del_sap(struct llc_sap *sap)
-{
-	write_lock_bh(&llc_sap_list_lock);
-	list_del(&sap->node);
-	write_unlock_bh(&llc_sap_list_lock);
-}
-
 static struct llc_sap *__llc_sap_find(unsigned char sap_value)
 {
 	struct llc_sap* sap;
@@ -93,13 +69,13 @@ out:
  */
 struct llc_sap *llc_sap_find(unsigned char sap_value)
 {
-	struct llc_sap* sap;
+	struct llc_sap *sap;
 ...
From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

For the cases where a lot of interfaces are used in conjunction with a
lot of LLC sockets bound to the same SAP, the iteration of the socket
list becomes prohibitively expensive.

Replacing the list with a a local address based hash significantly
improves the bind and listener lookup operations as well as the
datagram delivery.

Connected sockets delivery is also improved, but this patch does not
address the case where we have lots of sockets with the same local
address connected to different remote addresses.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/net/llc.h  |   20 ++++++++++++++-
 net/llc/llc_conn.c |   68 +++++++++++++++++++++++++++++++++++++--------------
 net/llc/llc_core.c |    5 ++-
 net/llc/llc_proc.c |   44 +++++++++++++++++++++++----------
 net/llc/llc_sap.c  |   11 +++++++-
 5 files changed, 111 insertions(+), 37 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index dbef591..b020185 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -17,6 +17,8 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rculist_nulls.h>
+#include <linux/hash.h>
+#include <linux/jhash.h>
 
 #include <asm/atomic.h>
 
@@ -35,6 +37,9 @@ struct llc_addr {
 #define LLC_SK_DEV_HASH_BITS 6
 #define LLC_SK_DEV_HASH_ENTRIES (1<<LLC_SK_DEV_HASH_BITS)
 
+#define LLC_SK_LADDR_HASH_BITS 6
+#define LLC_SK_LADDR_HASH_ENTRIES (1<<LLC_SK_LADDR_HASH_BITS)
+
 /**
  * struct llc_sap - Defines the SAP component
  *
@@ -58,7 +63,7 @@ struct llc_sap {
 	struct llc_addr	 laddr;
 	struct list_head node;
 	spinlock_t sk_lock;
-	struct hlist_nulls_head sk_list;
+	struct hlist_nulls_head sk_laddr_hash[LLC_SK_LADDR_HASH_ENTRIES];
 	struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES];
 };
 
@@ -68,6 +73,19 @@ struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex)
 	return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES];
 }
 
+static inline
+u32 llc_sk_laddr_hashfn(struct llc_sap *sap, const struct ...
From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

For the reclamation phase we use the SLAB_DESTROY_BY_RCU mechanism,
which require some extra checks in the lookup code:

a) If the current socket was released, reallocated & inserted in
another list it will short circuit the iteration for the current list,
thus we need to restart the lookup.

b) If the current socket was released, reallocated & inserted in the
same list we just need to recheck it matches the look-up criteria and
if not we can skip to the next element.

In this case there is no need to restart the lookup, since sockets are
inserted at the start of the list and the worst that will happen is
that we will iterate throught some of the list elements more then
once.

Note that the /proc and multicast delivery was not yet converted to
RCU, it still uses spinlocks for protection.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/net/llc.h  |    7 ++--
 net/llc/af_llc.c   |    1 +
 net/llc/llc_conn.c |   93 ++++++++++++++++++++++++++++++++++-----------------
 net/llc/llc_core.c |    5 ++-
 net/llc/llc_proc.c |   22 ++++++------
 net/llc/llc_sap.c  |   66 ++++++++++++++++++++++++-------------
 6 files changed, 123 insertions(+), 71 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index 7940da1..1559cf1 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -16,6 +16,7 @@
 #include <linux/if_ether.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/rculist_nulls.h>
 
 #include <asm/atomic.h>
 
@@ -53,10 +54,8 @@ struct llc_sap {
 				     struct net_device *orig_dev);
 	struct llc_addr	 laddr;
 	struct list_head node;
-	struct {
-		rwlock_t	  lock;
-		struct hlist_head list;
-	} sk_list;
+	spinlock_t sk_lock;
+	struct hlist_nulls_head sk_list;
 };
 
 #define LLC_DEST_INVALID         0      /* Invalid LLC PDU type */
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c4d1a1d..f49f3dd 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -140,6 +140,7 @@ static struct proto llc_proto ...
From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

Using bind(MAC address) with LLC sockets has O(n) complexity, where n
is the number of interfaces. To overcome this, we add support for
SO_BINDTODEVICE which drops the complexity to O(1).

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 net/llc/af_llc.c |   29 +++++++++++++++++++++++++++--
 1 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index ac691fe..c4d1a1d 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -259,7 +259,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 	if (!sock_flag(sk, SOCK_ZAPPED))
 		goto out;
 	rc = -ENODEV;
-	llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+	if (sk->sk_bound_dev_if) {
+		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
+			dev_put(llc->dev);
+			llc->dev = NULL;
+		}
+	} else
+		llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
 	if (!llc->dev)
 		goto out;
 	rc = -EUSERS;
@@ -310,7 +317,25 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 		goto out;
 	rc = -ENODEV;
 	rtnl_lock();
-	llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac);
+	if (sk->sk_bound_dev_if) {
+		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		if (llc->dev) {
+			if (!addr->sllc_arphrd)
+				addr->sllc_arphrd = llc->dev->type;
+			if (llc_mac_null(addr->sllc_mac))
+				memcpy(addr->sllc_mac, llc->dev->dev_addr,
+				       IFHWADDRLEN);
+			if (addr->sllc_arphrd != llc->dev->type ||
+			    !llc_mac_match(addr->sllc_mac,
+					   llc->dev->dev_addr)) {
+				rc = -EINVAL;
+				dev_put(llc->dev);
+				llc->dev = NULL;
+			}
+		}
+	} else
+		llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
+					   addr->sllc_mac);
 	rtnl_unlock();
 	if (!llc->dev)
 		goto out;
-- 
1.5.6.5

--

From: Octavian Purdila
Date: Wednesday, December 23, 2009 - 4:45 pm

This patch adds a per SAP device based hash table to solve the
multicast delivery scalability issue when we have large number of
interfaces and a large number of sockets bound to the same SAP.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/net/llc.h      |   11 +++++++++++
 include/net/llc_conn.h |    1 +
 net/llc/llc_conn.c     |   10 +++++++++-
 net/llc/llc_sap.c      |    8 ++++++--
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/net/llc.h b/include/net/llc.h
index 1559cf1..dbef591 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -32,6 +32,9 @@ struct llc_addr {
 #define LLC_SAP_STATE_INACTIVE	1
 #define LLC_SAP_STATE_ACTIVE	2
 
+#define LLC_SK_DEV_HASH_BITS 6
+#define LLC_SK_DEV_HASH_ENTRIES (1<<LLC_SK_DEV_HASH_BITS)
+
 /**
  * struct llc_sap - Defines the SAP component
  *
@@ -56,8 +59,16 @@ struct llc_sap {
 	struct list_head node;
 	spinlock_t sk_lock;
 	struct hlist_nulls_head sk_list;
+	struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES];
 };
 
+static inline
+struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex)
+{
+	return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES];
+}
+
+
 #define LLC_DEST_INVALID         0      /* Invalid LLC PDU type */
 #define LLC_DEST_SAP             1      /* Type 1 goes here */
 #define LLC_DEST_CONN            2      /* Type 2 goes here */
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index fe982fd..2f97d8d 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -77,6 +77,7 @@ struct llc_sock {
 					      received and caused sending FRMR.
 					      Used for resending FRMR */
 	u32		    cmsg_flags;
+	struct hlist_node   dev_hash_node;
 };
 
 static inline struct llc_sock *llc_sk(const struct sock *sk)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 6e10c0d..7d776c0 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -682,10 +682,15 @@ static int llc_find_offset(int state, int ...
From: Eric Dumazet
Date: Thursday, December 24, 2009 - 1:35 am

Pretty impressive work Octavian !

My only concerns (before drinking my coffee, I might be wrong...) are :

1) Patch 7/9 : __llc_lookup_established()
   Checking slot number is not enough I am afraid. A socket can be freed,
   re-allocated, inserted in another sap hash list on _same_ slot number.
   We dont have this problem with UDP/TCP since we have only
   one hash table on machine, but with llc, we might have many sap hash tables.

   So before if (llc_estab_match(sap, daddr, laddr, rc)) test, you probably
   need to check if we found a socket hashed on a different hash table and
   restart the lookup.

2) the WARN_ON() removal in patch 7/9 :

 void llc_sap_close(struct llc_sap *sap)
 {
-	WARN_ON(!hlist_nulls_empty(&sap->sk_list));
 	llc_del_sap(sap);
 	kfree(sap);
 }

I believe we should keep the sanity test some time, converted to sk_laddr_hash[] variant.

Thanks
--

From: Octavian Purdila
Date: Thursday, December 24, 2009 - 5:17 am

Woudn't the sap check after the the atomic_inc check take care of this?

        rcu_read_lock();
again:
        sk_nulls_for_each_rcu(rc, node, laddr_hb) {
                if (llc_estab_match(sap, daddr, laddr, rc)) {
                        /* Extra checks required by SLAB_DESTROY_BY_RCU */
                        if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt) ||
                                     llc_sk(rc)->sap != sap))
                                goto again;
                        if (unlikely(!llc_estab_match(sap, daddr, laddr, rc))) {
                                sock_put(rc);
                                continue;
                        }
                        goto found;
                }
        }


I think adding counter would be necessary for this - to avoid iterating over all the hash buckets. Does that sound ok?

Thanks !
--

From: David Miller
Date: Friday, December 25, 2009 - 6:11 pm

From: Octavian Purdila <opurdila@ixiacom.com>

Octavian, please resubmit your patch series after you've
sorted out these issues/leaks/etc.

Thanks a lot!
--

Previous thread: [PATCH 7/8] sfc: QT2025C: Add error message for suspected bad SFP+ cables by Ben Hutchings on Wednesday, December 23, 2009 - 4:48 pm. (1 message)

Next thread: [PATCH 8/8] sfc: Disable TX descriptor prefetch watchdog by Ben Hutchings on Wednesday, December 23, 2009 - 4:49 pm. (1 message)