The Server Side Transport switch implements a pluggable transport provider layer for RPC servers. This switch allows network transport drivers to be dynamically loaded and registered, thereby extending the set of transports that can carry RPC. This patchset includes a transport provider driver for TCP and UDP sockets. This version of the patchset includes the following updates to the original patch series: - Changed transport hdr len fields and computations to use size_t - Matched local variables to sign of socket specific write-space functions in xpo_wspace transport functions. - Modified tcp_has_wspace function to include min_wspace consideration - Changed address length functions to size_t - Changed port helper function return types to unsigned short - Changed error return from svc_addr_len to EAFNOSUPPORT from ENOTSUPP - When modifying function prototypes or adding new functions adhered to convention of keeping signature on a single line. - Removed debug printk from svc_send - Created separate patch for change that moves the call to svc_xprt_received to common code for the accept path - Combined the portlist update patches into a single patch and updated the error codes to give more helpful perror strings when writing garbage to the portlist file. This patchset is based on 2.6.24-rc5 and has been tested on V2, V3 and V4 over the UDP, TCP and RDMA transports. -- Signed-off-by: Tom Tucker <tom@opengridcomputing.com> -
The transport class (svc_xprt_class) represents a type of transport, e.g.
udp, tcp, rdma. A transport class has a unique name and a set of transport
operations kept in the svc_xprt_ops structure.
A transport class can be dynamically registered and unregisterd. The
svc_xprt_class represents the module that implements the transport
type and keeps reference counts on the module to avoid unloading while
there are active users.
The endpoint (svc_xprt) is a generic, transport independent endpoint that can
be used to send and receive data for an RPC service. It inherits it's
operations from the transport class.
A transport driver module registers and unregisters itself with svc sunrpc
by calling svc_reg_xprt_class, and svc_unreg_xprt_class respectively.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/debug.h | 1
include/linux/sunrpc/svc_xprt.h | 31 +++++++++++++
net/sunrpc/Makefile | 3 +
net/sunrpc/svc_xprt.c | 95 +++++++++++++++++++++++++++++++++++++++
4 files changed, 129 insertions(+), 1 deletions(-)
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 3912cf1..092fcfa 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -21,6 +21,7 @@
#define RPCDBG_SCHED 0x0040
#define RPCDBG_TRANS 0x0080
#define RPCDBG_SVCSOCK 0x0100
+#define RPCDBG_SVCXPRT 0x0100
#define RPCDBG_SVCDSP 0x0200
#define RPCDBG_MISC 0x0400
#define RPCDBG_CACHE 0x0800
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
new file mode 100644
index 0000000..a8b1da8
--- /dev/null
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -0,0 +1,31 @@
+/*
+ * linux/include/linux/sunrpc/svc_xprt.h
+ *
+ * RPC server transport I/O
+ */
+
+#ifndef SUNRPC_SVC_XPRT_H
+#define SUNRPC_SVC_XPRT_H
+
+#include <linux/sunrpc/svc.h>
+
+struct svc_xprt_ops {
+};
+
+struct svc_xprt_class {
+ const char *xcl_name;
+ struct ...Sorry for joining in a little late.... None of the callers appear to check the return value, so this should Unless we care how xcl_list is set in the error case, this is Is this even worth checking? Accidentally registering the identical struct svc_xprt_class * seems an unlikely mistake for a transport-class coder to make, given that they only need call this function once per transport, in the module initialization. Maybe make this a BUG()? Or Again, nobody checks this error return, and it seems like an unlikely programmer error anyway. If we really need the check I'd be inclined just to ditche the for loop and BUG_ON(list_empty(&cl->xcl_list)), which Seems fine otherwise. --b. -
Make TCP and UDP svc_sock transports, and register them
with the svc transport core.
A transport type (svc_sock) has an svc_xprt as its first member,
and calls svc_xprt_init to initialize this field.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/debug.h | 1 -
include/linux/sunrpc/svcsock.h | 4 ++++
net/sunrpc/sunrpc_syms.c | 4 +++-
net/sunrpc/svcsock.c | 33 ++++++++++++++++++++++++++++++++-
4 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 092fcfa..10709cb 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -20,7 +20,6 @@
#define RPCDBG_BIND 0x0020
#define RPCDBG_SCHED 0x0040
#define RPCDBG_TRANS 0x0080
-#define RPCDBG_SVCSOCK 0x0100
#define RPCDBG_SVCXPRT 0x0100
#define RPCDBG_SVCDSP 0x0200
#define RPCDBG_MISC 0x0400
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a53e0fa..1878cbe 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -10,11 +10,13 @@
#define SUNRPC_SVCSOCK_H
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
/*
* RPC server socket.
*/
struct svc_sock {
+ struct svc_xprt sk_xprt;
struct list_head sk_ready; /* list of ready sockets */
struct list_head sk_list; /* list of all sockets */
struct socket * sk_sock; /* berkeley socket layer */
@@ -78,6 +80,8 @@ int svc_addsock(struct svc_serv *serv,
int fd,
char *name_return,
int *proto);
+void svc_init_xprt_sock(void);
+void svc_cleanup_xprt_sock(void);
/*
* svc_makesock socket characteristics
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 33d89e8..79ea05f 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -151,7 +151,8 @@ init_sunrpc(void)
#endif
cache_register(&ip_map_cache);
...The rqstp structure contains a pointer to the transport for the
RPC request. This functionaly trivial patch adds an unamed union
with pointers to both svc_sock and svc_xprt. Ultimately the
union will be removed and only the rq_xprt field will remain. This
allows incrementally extracting transport independent interfaces without
one gigundo patch.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc.h | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 8531a70..37f7448 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -204,7 +204,10 @@ union svc_addr_u {
struct svc_rqst {
struct list_head rq_list; /* idle list */
struct list_head rq_all; /* all threads list */
- struct svc_sock * rq_sock; /* socket */
+ union {
+ struct svc_xprt * rq_xprt; /* transport ptr */
+ struct svc_sock * rq_sock; /* socket ptr */
+ };
struct sockaddr_storage rq_addr; /* peer address */
size_t rq_addrlen;
-
The svc_max_payload function currently looks at the socket type
to determine the max payload. Add a max payload value to svc_xprt_class
so it can be returned directly.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 1 +
net/sunrpc/svc.c | 4 +---
net/sunrpc/svcsock.c | 2 ++
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a8b1da8..b4ce054 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -17,6 +17,7 @@ struct svc_xprt_class {
struct module *xcl_owner;
struct svc_xprt_ops *xcl_ops;
struct list_head xcl_list;
+ u32 xcl_max_payload;
};
struct svc_xprt {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a4a6bf7..ea3fa86 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1054,10 +1054,8 @@ err_bad:
*/
u32 svc_max_payload(const struct svc_rqst *rqstp)
{
- int max = RPCSVC_MAXPAYLOAD_TCP;
+ u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
- if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM)
- max = RPCSVC_MAXPAYLOAD_UDP;
if (rqstp->rq_server->sv_max_payload < max)
max = rqstp->rq_server->sv_max_payload;
return max;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 4755467..ca9b8d8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -906,6 +906,7 @@ static struct svc_xprt_ops svc_udp_ops = {
static struct svc_xprt_class svc_udp_class = {
.xcl_name = "udp",
.xcl_ops = &svc_udp_ops,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP,
};
static void
@@ -1359,6 +1360,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
static struct svc_xprt_class svc_tcp_class = {
.xcl_name = "tcp",
.xcl_ops = &svc_tcp_ops,
+ .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
};
void svc_init_xprt_sock(void)
-
The sk_sendto and sk_recvfrom are function pointers that allow svc_sock
to be used for both UDP and TCP. Move these function pointers to the
svc_xprt_ops structure.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 2 ++
include/linux/sunrpc/svcsock.h | 3 ---
net/sunrpc/svcsock.c | 12 ++++++------
3 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b4ce054..81daa39 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,8 @@
#include <linux/sunrpc/svc.h>
struct svc_xprt_ops {
+ int (*xpo_recvfrom)(struct svc_rqst *);
+ int (*xpo_sendto)(struct svc_rqst *);
};
struct svc_xprt_class {
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 1878cbe..08e78d0 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -45,9 +45,6 @@ struct svc_sock {
* be revisted */
struct mutex sk_mutex; /* to serialize sending data */
- int (*sk_recvfrom)(struct svc_rqst *rqstp);
- int (*sk_sendto)(struct svc_rqst *rqstp);
-
/* We keep the old state_change and data_ready CB's here */
void (*sk_ostate)(struct sock *);
void (*sk_odata)(struct sock *, int bytes);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ca9b8d8..9c06b15 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -901,6 +901,8 @@ svc_udp_sendto(struct svc_rqst *rqstp)
}
static struct svc_xprt_ops svc_udp_ops = {
+ .xpo_recvfrom = svc_udp_recvfrom,
+ .xpo_sendto = svc_udp_sendto,
};
static struct svc_xprt_class svc_udp_class = {
@@ -918,8 +920,6 @@ svc_udp_init(struct svc_sock *svsk)
svc_xprt_init(&svc_udp_class, &svsk->sk_xprt);
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
- svsk->sk_recvfrom = svc_udp_recvfrom;
- svsk->sk_sendto = ...The svc_sock_release function releases pages allocated to a thread. For
UDP, this also returns the receive skb to the stack. For RDMA it will
post a receive WR and bump the client credit count.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc.h | 2 +-
include/linux/sunrpc/svc_xprt.h | 1 +
net/sunrpc/svcsock.c | 17 +++++++++--------
3 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 37f7448..cfb2652 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -217,7 +217,7 @@ struct svc_rqst {
struct auth_ops * rq_authop; /* authentication flavour */
u32 rq_flavor; /* pseudoflavor */
struct svc_cred rq_cred; /* auth info */
- struct sk_buff * rq_skbuff; /* fast recv inet buffer */
+ void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
struct xdr_buf rq_arg;
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 81daa39..e3bd7b1 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -12,6 +12,7 @@
struct svc_xprt_ops {
int (*xpo_recvfrom)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
+ void (*xpo_release_rqst)(struct svc_rqst *);
};
struct svc_xprt_class {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9c06b15..b24c084 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -185,14 +185,13 @@ svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
/*
* Release an skbuff after use
*/
-static inline void
-svc_release_skb(struct svc_rqst *rqstp)
+static void svc_release_skb(struct svc_rqst *rqstp)
{
- struct sk_buff *skb = rqstp->rq_skbuff;
+ struct sk_buff *skb = rqstp->rq_xprt_ctxt;
struct svc_deferred_req *dr = rqstp->rq_deferred;
if (skb) {
- rqstp->rq_skbuff = ...Add transport specific xpo_detach and xpo_free functions. The xpo_detach
function causes the transport to stop delivering data-ready events
and enqueing the transport for I/O.
The xpo_free function frees all resources associated with the particular
transport instance.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 2 +
net/sunrpc/svcsock.c | 56 ++++++++++++++++++++++++++++++---------
2 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index e3bd7b1..5d7b2a6 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -13,6 +13,8 @@ struct svc_xprt_ops {
int (*xpo_recvfrom)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
void (*xpo_release_rqst)(struct svc_rqst *);
+ void (*xpo_detach)(struct svc_xprt *);
+ void (*xpo_free)(struct svc_xprt *);
};
struct svc_xprt_class {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b24c084..478fa33 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -85,6 +85,8 @@ static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
static void svc_close_socket(struct svc_sock *svsk);
+static void svc_sock_detach(struct svc_xprt *);
+static void svc_sock_free(struct svc_xprt *);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -376,16 +378,8 @@ static inline void
svc_sock_put(struct svc_sock *svsk)
{
if (atomic_dec_and_test(&svsk->sk_inuse)) {
- BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
-
- dprintk("svc: releasing dead socket\n");
- if (svsk->sk_sock->file)
- sockfd_put(svsk->sk_sock);
- else
- sock_release(svsk->sk_sock);
- if (svsk->sk_info_authunix != ...Some transports add fields to the RPC header for replies, e.g. the TCP
record length. This function is called when preparing the reply header
to allow each transport to add whatever fields it requires.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 1 +
net/sunrpc/svc.c | 6 +++---
net/sunrpc/svcsock.c | 17 +++++++++++++++++
3 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5d7b2a6..8501115 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -11,6 +11,7 @@
struct svc_xprt_ops {
int (*xpo_recvfrom)(struct svc_rqst *);
+ void (*xpo_prep_reply_hdr)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index ea3fa86..3cc945d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -839,9 +839,9 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_len = 0;
/* Will be turned off only in gss privacy case: */
rqstp->rq_splice_ok = 1;
- /* tcp needs a space for the record length... */
- if (rqstp->rq_prot == IPPROTO_TCP)
- svc_putnl(resv, 0);
+
+ /* Setup reply header */
+ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 478fa33..510ad45 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -893,12 +893,17 @@ svc_udp_sendto(struct svc_rqst *rqstp)
return error;
}
+static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
+{
+}
+
static struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_release_rqst = svc_release_skb,
.xpo_detach = svc_sock_detach,
.xpo_free = ...In order to avoid blocking a service thread, the receive side checks
to see if there is sufficient write space to reply to the request.
Each transport has a different mechanism for determining if there is
enough write space to reply.
The code that checked for write space was coupled with code that
checked for CLOSE and CONN. These checks have been broken out into
separate statements to make the code easier to read.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 1
net/sunrpc/svcsock.c | 82 +++++++++++++++++++++++++++------------
2 files changed, 57 insertions(+), 26 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8501115..3adc8f3 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,7 @@
#include <linux/sunrpc/svc.h>
struct svc_xprt_ops {
+ int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
void (*xpo_prep_reply_hdr)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 510ad45..e24bf22 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -205,22 +205,6 @@ static void svc_release_skb(struct svc_rqst *rqstp)
}
/*
- * Any space to write?
- */
-static inline unsigned long
-svc_sock_wspace(struct svc_sock *svsk)
-{
- int wspace;
-
- if (svsk->sk_sock->type == SOCK_STREAM)
- wspace = sk_stream_wspace(svsk->sk_sk);
- else
- wspace = sock_wspace(svsk->sk_sk);
-
- return wspace;
-}
-
-/*
* Queue up a socket with data pending. If there are idle nfsd
* processes, wake 'em up.
*
@@ -269,22 +253,24 @@ svc_sock_enqueue(struct svc_sock *svsk)
BUG_ON(svsk->sk_pool != NULL);
svsk->sk_pool = pool;
- set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
- > svc_sock_wspace(svsk))
- && !test_bit(SK_CLOSE, ...Hi Tom- Since "required" is an int, this test can behave differently than the one it replaces. What's the test plan to ensure the new test works reasonably? -- Chuck Lever chuck[dot]lever[at]oracle[dot]com -
Here's my matrix: Connectathon iozone Kernel build V2 udp,tcp V3 tcp,rdma tcp,rdma tcp,rdma V4 tcp,rdma tcp,rdma tcp,rdma I also kill tests to test the signal race paths, and I run multiple tests concurrently (e.g. a couple iozones plus a kernel build). I guess none of these tests explicitly test the path in question -
If sk_reserved can approach half of 2^31, for example, then surely we have bigger problems? --b. -
What stops sk_reserved from going negative? -- Chuck Lever chuck[dot]lever[at]oracle[dot]com -
Nothing actively _stops_ it from going negative. Indirectly it is prevented by the "fact" that sv_max_mesg is always greater than the amount returned by any read on the socket. If this is not true, then sk_reserved can go negative. That would occur in svc_recv when the difference between the amount currently reserved (worst case sv_max_mesg) and the amount read is subtracted from the amount reserved. If the amount read were greater than sv_max_mesg then the result would be negative (both meanings intended). -
I have to admit, I'm completely and utterly lost here. Chuck, could you explain exactly, in detail, how you think this change could cause a bug? --b. -
The original code has this:
static inline unsigned long
svc_sock_wspace(struct svc_sock *svsk)
{
int wspace;
if (svsk->sk_sock->type == SOCK_STREAM)
wspace = sk_stream_wspace(svsk->sk_sk);
else
wspace = sock_wspace(svsk->sk_sk);
return wspace;
}
...
if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
> svc_sock_wspace(svsk))
Since svc_sock_wspace() returns an unsigned long, that means the sum
above is always converted to unsigned long. If sk_reserved goes
negative, then the sum becomes a large positive.
Note that in the TCP case, sk_stream_wspace() returns an int, which
is also converted to an unsigned long by svc_sock_wspace().
sk_stream_wspace() can return a negative if sk_wmem_queued happens to
become larger than sk_sndbuf.
In all the other wspace callbacks I checked, the result of
sk_stream_wspace() is compared to sk_stream_min_space() -- comparing
an int to an int -- which means if sk_stream_wspace()'s result *does*
go negative, those comparisons will catch it and do the right thing.
In the original RPC server implementation, though, sk_stream_wspace
()'s result is converted to unsigned long; negative return values
thus become large positives.
In both of these scenarios, the large positives make the comparison
behave in exactly the opposite way we might expect it to. Tom's code
fixes this by making "required" an int for the TCP case so we still
get an int-to-int comparison.
Because the original is coded the way it is, however, it's difficult
to tell whether the signedness of these variables was intended for
some clever effect, or whether it was an oversight. If this was
intentional, the new wspace check for TCP could have unintended
performance consequences.
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
-
My (limited) understanding is that the amount of socket output buffer space the server thinks it needs for sending a reply is subtracted from sk_reserved. So it seems like svc_reserve() would be the place to assert that sk_reserved does not go negative. As this is used mostly by svc_reserve_auth(), that suggests that a reasonable test case would be running a read/write intensive workload with some security flavor that uses large authenticators. -- Chuck Lever chuck[dot]lever[at]oracle[dot]com -
Close handling was duplicated in the UDP and TCP recvfrom
methods. This code has been moved to the transport independent
svc_recv function.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
net/sunrpc/svcsock.c | 24 ++++++++++--------------
1 files changed, 10 insertions(+), 14 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e24bf22..7373244 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -776,11 +776,6 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
return svc_deferred_recv(rqstp);
}
- if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
- svc_delete_socket(svsk);
- return 0;
- }
-
clear_bit(SK_DATA, &svsk->sk_flags);
skb = NULL;
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
@@ -1181,11 +1176,6 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
return svc_deferred_recv(rqstp);
}
- if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
- svc_delete_socket(svsk);
- return 0;
- }
-
if (svsk->sk_sk->sk_state == TCP_LISTEN) {
svc_tcp_accept(svsk);
svc_sock_received(svsk);
@@ -1575,10 +1565,16 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
}
spin_unlock_bh(&pool->sp_lock);
- dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
- rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
- len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
- dprintk("svc: got len=%d\n", len);
+ len = 0;
+ if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+ dprintk("svc_recv: found SK_CLOSE\n");
+ svc_delete_socket(svsk);
+ } else {
+ dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
+ rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
+ len = svsk->sk_xprt.xpt_ops->xpo_recvfrom(rqstp);
+ dprintk("svc: got len=%d\n", len);
+ }
/* No data, incomplete (TCP) read, or accept() */
if (len == 0 || len == -EAGAIN) {
-
Previously, the accept logic looked into the socket state to determine
whether to call accept or recv when data-ready was indicated on an endpoint.
Since some transports don't use sockets, this logic was changed to use a flag
bit (SK_LISTENER) to identify listening endpoints. A transport function
(xpo_accept) was added to allow each transport to define its own accept
processing. A transport's initialization logic is reponsible for setting the
SK_LISTENER bit. I didn't see any way to do this in transport independent
logic since the passive side of a UDP connection doesn't listen and
always recv's.
In the svc_recv function, if the SK_LISTENER bit is set, the transport
xpo_accept function is called to handle accept processing.
Note that all functions are defined even if they don't make sense
for a given transport. For example, accept doesn't mean anything for
UDP. The function is defined anyway and bug checks if called. The
UDP transport should never set the SK_LISTENER bit.
The code that poaches connections when the connection
limit is hit was moved to a subroutine to make the accept logic path
easier to follow. Since this is in the new connection path, it should
not be a performance issue.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 1
include/linux/sunrpc/svcsock.h | 1
net/sunrpc/svcsock.c | 128 +++++++++++++++++++++------------------
3 files changed, 72 insertions(+), 58 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 3adc8f3..1527ff1 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,7 @@
#include <linux/sunrpc/svc.h>
struct svc_xprt_ops {
+ struct svc_xprt *(*xpo_accept)(struct svc_xprt *);
int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
void (*xpo_prep_reply_hdr)(struct svc_rqst *);
diff --git a/include/linux/sunrpc/svcsock.h ...Nit: I'd put description of what this patch does in the present tense. It looks like the content of "buf" is unitialized here? -
This is a bug I introduced. How about if I break this into a separate patch (like you suggest above) with the appropriate __svc_print_addr -
Why did we need that svc_sock_enqueue here, and why don't we any more? (And if we don't, but we still need the set_bit, then we need to fix the comment at the top of the file claiming the svc_sock_enqueue() is always required after setting SK_CONN.) --b. -
Calling svc_sock_enqueue while holding the BUSY bit is a no-op. We can We need to call svc_sock_enqueue, but it is done indirectly through svc_sock_received -- that calls svc_sock_enqueue after clearing the BUSY -
Modify the various kernel RPC svcs to use the svc_create_xprt service.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
fs/lockd/svc.c | 17 ++++++++---------
fs/nfs/callback.c | 4 ++--
fs/nfsd/nfssvc.c | 4 ++--
include/linux/sunrpc/svcsock.h | 1 -
net/sunrpc/sunrpc_syms.c | 1 -
net/sunrpc/svcsock.c | 22 ----------------------
6 files changed, 12 insertions(+), 37 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 82e2192..8686915 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -219,13 +219,12 @@ lockd(struct svc_rqst *rqstp)
module_put_and_exit(0);
}
-
-static int find_socket(struct svc_serv *serv, int proto)
+static int find_xprt(struct svc_serv *serv, char *proto)
{
struct svc_sock *svsk;
int found = 0;
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
- if (svsk->sk_sk->sk_protocol == proto) {
+ if (strcmp(svsk->sk_xprt.xpt_class->xcl_name, proto) == 0) {
found = 1;
break;
}
@@ -243,13 +242,13 @@ static int make_socks(struct svc_serv *serv, int proto)
int err = 0;
if (proto == IPPROTO_UDP || nlm_udpport)
- if (!find_socket(serv, IPPROTO_UDP))
- err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport,
- SVC_SOCK_DEFAULTS);
+ if (!find_xprt(serv, "udp"))
+ err = svc_create_xprt(serv, "udp", nlm_udpport,
+ SVC_SOCK_DEFAULTS);
if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
- if (!find_socket(serv, IPPROTO_TCP))
- err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport,
- SVC_SOCK_DEFAULTS);
+ if (!find_xprt(serv, "tcp"))
+ err = svc_create_xprt(serv, "tcp", nlm_tcpport,
+ SVC_SOCK_DEFAULTS);
if (err >= 0) {
warned = 0;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a796be5..e27ca14 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -123,8 +123,8 @@ int nfs_callback_up(void)
if (!serv)
goto out_err;
- ret = svc_makesock(serv, ...The svc_create_xprt function is a transport independent version
of the svc_makesock function.
Since transport instance creation contains transport dependent and
independent components, add an xpo_create transport function. The
transport implementation of this function allocates the memory for the
endpoint, implements the transport dependent initialization logic, and
calls svc_xprt_init to initialize the transport independent field (svc_xprt)
in it's data structure.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 4 +++
net/sunrpc/svc_xprt.c | 37 ++++++++++++++++++++++++
net/sunrpc/svcsock.c | 59 +++++++++++++++++++++++++++++----------
3 files changed, 85 insertions(+), 15 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 1527ff1..3f4a1df 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -10,6 +10,9 @@
#include <linux/sunrpc/svc.h>
struct svc_xprt_ops {
+ struct svc_xprt *(*xpo_create)(struct svc_serv *,
+ struct sockaddr *, int,
+ int);
struct svc_xprt *(*xpo_accept)(struct svc_xprt *);
int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
@@ -36,5 +39,6 @@ struct svc_xprt {
int svc_reg_xprt_class(struct svc_xprt_class *);
int svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
+int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
#endif /* SUNRPC_SVC_XPRT_H */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 92ea85b..9136da4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -93,3 +93,40 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt)
xprt->xpt_ops = xcl->xcl_ops;
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
+
+int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+ int ...This functionally trivial patch moves the sk_reserved field to the
transport independent svc_xprt structure.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 1 +
include/linux/sunrpc/svcsock.h | 2 --
net/sunrpc/svcsock.c | 10 +++++-----
3 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 21fa6ad..c9892d5 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -53,6 +53,7 @@ struct svc_xprt {
struct svc_pool *xpt_pool; /* current pool iff queued */
struct svc_serv *xpt_server; /* service for transport */
+ atomic_t xpt_reserved; /* space on outq that is rsvd */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 060508b..ba41f11 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -20,8 +20,6 @@ struct svc_sock {
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
- atomic_t sk_reserved; /* space on outq that is reserved */
-
spinlock_t sk_lock; /* protects sk_deferred and
* sk_info_authunix */
struct list_head sk_deferred; /* deferred requests that need to
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0541c55..954689e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -288,7 +288,7 @@ svc_sock_enqueue(struct svc_sock *svsk)
rqstp->rq_sock = svsk;
svc_xprt_get(&svsk->sk_xprt);
rqstp->rq_reserved = serv->sv_max_mesg;
- atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
+ atomic_add(rqstp->rq_reserved, &svsk->sk_xprt.xpt_reserved);
BUG_ON(svsk->sk_xprt.xpt_pool != pool);
wake_up(&rqstp->rq_wait);
} else {
@@ -353,7 +353,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
if (space < rqstp->rq_reserved) {
struct ...Move sk_list and sk_ready to svc_xprt. This involves close because these
lists are walked by svcs when closing all their transports. So I combined
the moving of these lists to svc_xprt with making close transport independent.
The svc_force_sock_close has been changed to svc_close_all and takes a list
as an argument. This removes some svc internals knowledge from the svcs.
This code races with module removal and transport addition.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
fs/lockd/svc.c | 6 +-
fs/nfsd/nfssvc.c | 4 +
include/linux/sunrpc/svc_xprt.h | 2 +
include/linux/sunrpc/svcsock.h | 4 -
net/sunrpc/svc.c | 9 +--
net/sunrpc/svc_xprt.c | 2 +
net/sunrpc/svcsock.c | 106 +++++++++++++++++++--------------------
7 files changed, 63 insertions(+), 70 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 8686915..a8e79a9 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -221,10 +221,10 @@ lockd(struct svc_rqst *rqstp)
static int find_xprt(struct svc_serv *serv, char *proto)
{
- struct svc_sock *svsk;
+ struct svc_xprt *xprt;
int found = 0;
- list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
- if (strcmp(svsk->sk_xprt.xpt_class->xcl_name, proto) == 0) {
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
+ if (strcmp(xprt->xpt_class->xcl_name, proto) == 0) {
found = 1;
break;
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a828b0b..9647b0f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -155,8 +155,8 @@ static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv)
{
/* When last nfsd thread exits we need to do some clean-up */
- struct svc_sock *svsk;
- list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+ struct svc_xprt *xprt;
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
lockd_down();
nfsd_serv = NULL;
...Move the sk_mutex field to the transport independent svc_xprt structure.
Now all the fields that svc_send touches are transport neutral. Change the
svc_send function to use the transport independent svc_xprt directly instead
of the transport dependent svc_sock structure.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 1 +
include/linux/sunrpc/svcsock.h | 1 -
net/sunrpc/svc_xprt.c | 1 +
net/sunrpc/svcsock.c | 19 ++++++++-----------
4 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index c9892d5..d5ef902 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -54,6 +54,7 @@ struct svc_xprt {
struct svc_pool *xpt_pool; /* current pool iff queued */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
+ struct mutex xpt_mutex; /* to serialize sending data */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index ba41f11..41c2dfa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -24,7 +24,6 @@ struct svc_sock {
* sk_info_authunix */
struct list_head sk_deferred; /* deferred requests that need to
* be revisted */
- struct mutex sk_mutex; /* to serialize sending data */
/* We keep the old state_change and data_ready CB's here */
void (*sk_ostate)(struct sock *);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bbdada7..7544102 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -112,6 +112,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
xprt->xpt_server = serv;
INIT_LIST_HEAD(&xprt->xpt_list);
INIT_LIST_HEAD(&xprt->xpt_ready);
+ mutex_init(&xprt->xpt_mutex);
}
...This is another incremental change that moves transport independent
fields from svc_sock to the svc_xprt structure. The changes
should be functionally null.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 6 +++-
include/linux/sunrpc/svcsock.h | 2 -
net/sunrpc/svc_xprt.c | 4 ++-
net/sunrpc/svcsock.c | 57 ++++++++++++++++++---------------------
4 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index f391d21..4f7dbbc 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -48,11 +48,15 @@ struct svc_xprt {
#define XPT_OLD 9 /* used for xprt aging mark+sweep */
#define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */
+
+ struct svc_pool *xpt_pool; /* current pool iff queued */
+ struct svc_serv *xpt_server; /* service for transport */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
int svc_unreg_xprt_class(struct svc_xprt_class *);
-void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
+void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+ struct svc_serv *);
int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
void svc_xprt_put(struct svc_xprt *xprt);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index b8a8496..92d4cc9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -22,8 +22,6 @@ struct svc_sock {
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
- struct svc_pool * sk_pool; /* current pool iff queued */
- struct svc_serv * sk_server; /* service for this socket */
atomic_t sk_reserved; /* space on outq that is reserved */
spinlock_t sk_lock; /* protects sk_deferred and
diff --git a/net/sunrpc/svc_xprt.c ...This functionally trivial change moves the transport independent sk_flags
field to the transport independent svc_xprt structure.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 12 +++
include/linux/sunrpc/svcsock.h | 13 ---
net/sunrpc/svcsock.c | 149 ++++++++++++++++++++-------------------
3 files changed, 87 insertions(+), 87 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index eb801ad..f391d21 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -36,6 +36,18 @@ struct svc_xprt {
struct svc_xprt_class *xpt_class;
struct svc_xprt_ops *xpt_ops;
struct kref xpt_ref;
+ unsigned long xpt_flags;
+#define XPT_BUSY 0 /* enqueued/receiving */
+#define XPT_CONN 1 /* conn pending */
+#define XPT_CLOSE 2 /* dead or dying */
+#define XPT_DATA 3 /* data pending */
+#define XPT_TEMP 4 /* connected transport */
+#define XPT_DEAD 6 /* transport closed */
+#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
+#define XPT_DEFERRED 8 /* deferred request pending */
+#define XPT_OLD 9 /* used for xprt aging mark+sweep */
+#define XPT_DETACHED 10 /* detached from tempsocks list */
+#define XPT_LISTENER 11 /* listening endpoint */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index ba07d50..b8a8496 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -24,19 +24,6 @@ struct svc_sock {
struct svc_pool * sk_pool; /* current pool iff queued */
struct svc_serv * sk_server; /* service for this socket */
- unsigned long sk_flags;
-#define SK_BUSY 0 /* enqueued/receiving */
-#define SK_CONN 1 /* conn pending */
-#define SK_CLOSE 2 /* dead or dying */
-#define SK_DATA 3 /* data pending */
-#define SK_TEMP 4 /* temp (TCP) socket */
-#define SK_DEAD 6 /* socket closed ...The svc_sock_enqueue function is now transport independent since all of
the fields it touches have been moved to the transport independent svc_xprt
structure. Change the function to use the svc_xprt structure directly
instead of the transport specific svc_sock structure.
Transport specific data-ready handlers need to call this function, so
export it.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
net/sunrpc/svcsock.c | 94 ++++++++++++++++++++++++++------------------------
1 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 954689e..d8df8c7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -5,7 +5,7 @@
*
* The server scheduling algorithm does not always distribute the load
* evenly when servicing a single client. May need to modify the
- * svc_sock_enqueue procedure...
+ * svc_xprt_enqueue procedure...
*
* TCP support is largely untested and may be a little slow. The problem
* is that we currently do two separate recvfrom's, one for the 4-byte
@@ -63,7 +63,7 @@
* providing that certain rules are followed:
*
* XPT_CONN, XPT_DATA, can be set or cleared at any time.
- * after a set, svc_sock_enqueue must be called.
+ * after a set, svc_xprt_enqueue must be called.
* after a clear, the socket must be read/accepted
* if this succeeds, it must be set again.
* XPT_CLOSE can set at any time. It is never cleared.
@@ -212,22 +212,21 @@ static void svc_release_skb(struct svc_rqst *rqstp)
* processes, wake 'em up.
*
*/
-static void
-svc_sock_enqueue(struct svc_sock *svsk)
+void svc_xprt_enqueue(struct svc_xprt *xprt)
{
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
+ struct svc_serv *serv = xprt->xpt_server;
struct svc_pool *pool;
struct svc_rqst *rqstp;
int cpu;
- if (!(svsk->sk_xprt.xpt_flags &
+ if (!(xprt->xpt_flags &
((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
return;
- if ...Now that the svc_xprt_received function handles transports, the call
to svc_xprt_received in the xpo_tcp_accept function can be moved to
common code.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
net/sunrpc/svcsock.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d8d88c1..c637acf 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1103,8 +1103,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
memcpy(&newsvsk->sk_local, sin, slen);
- svc_xprt_received(&newsvsk->sk_xprt);
-
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
@@ -1593,6 +1591,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
*/
__module_get(newxpt->xpt_class->xcl_owner);
svc_check_conn_limits(svsk->sk_xprt.xpt_server);
+ svc_xprt_received(newxpt);
}
svc_xprt_received(&svsk->sk_xprt);
} else {
-
Change the atomic_t reference count to a kref and move it to the
transport indepenent svc_xprt structure. Change the reference count
wrapper names to be generic.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 8 ++++++
include/linux/sunrpc/svcsock.h | 1 -
net/sunrpc/svc_xprt.c | 17 ++++++++++++
net/sunrpc/svcsock.c | 54 +++++++++++++++------------------------
4 files changed, 46 insertions(+), 34 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 3f4a1df..eb801ad 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -8,6 +8,7 @@
#define SUNRPC_SVC_XPRT_H
#include <linux/sunrpc/svc.h>
+#include <linux/module.h>
struct svc_xprt_ops {
struct svc_xprt *(*xpo_create)(struct svc_serv *,
@@ -34,11 +35,18 @@ struct svc_xprt_class {
struct svc_xprt {
struct svc_xprt_class *xpt_class;
struct svc_xprt_ops *xpt_ops;
+ struct kref xpt_ref;
};
int svc_reg_xprt_class(struct svc_xprt_class *);
int svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *);
int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
+void svc_xprt_put(struct svc_xprt *xprt);
+
+static inline void svc_xprt_get(struct svc_xprt *xprt)
+{
+ kref_get(&xprt->xpt_ref);
+}
#endif /* SUNRPC_SVC_XPRT_H */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 3181d9d..ba07d50 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -24,7 +24,6 @@ struct svc_sock {
struct svc_pool * sk_pool; /* current pool iff queued */
struct svc_serv * sk_server; /* service for this socket */
- atomic_t sk_inuse; /* use count */
unsigned long sk_flags;
#define SK_BUSY 0 /* enqueued/receiving */
#define SK_CONN 1 /* conn pending */
diff --git a/net/sunrpc/svc_xprt.c ...All this BUG_ON() is doing is checking for what looks like a bizarrely unlikely bug in kref_put(). The BUG_ON from the original code is: BUG_ON(!test_bit(SK_DEAD, &svsk->sk_flags)); But I don't have any strong feelings as to whether that's necessary. --b. -
All fields touched by svc_sock_received are now transport independent.
Change it to use svc_xprt directly. This function is called from
transport dependent code, so export it.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 2 +-
net/sunrpc/svcsock.c | 35 +++++++++++++++++------------------
2 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index d5ef902..c416d05 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -62,8 +62,8 @@ int svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
struct svc_serv *);
int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
+void svc_xprt_received(struct svc_xprt *);
void svc_xprt_put(struct svc_xprt *xprt);
-
static inline void svc_xprt_get(struct svc_xprt *xprt)
{
kref_get(&xprt->xpt_ref);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d546709..d8d88c1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -330,14 +330,13 @@ svc_sock_dequeue(struct svc_pool *pool)
* Note: XPT_DATA only gets cleared when a read-attempt finds
* no (or insufficient) data.
*/
-static inline void
-svc_sock_received(struct svc_sock *svsk)
+void svc_xprt_received(struct svc_xprt *xprt)
{
- svsk->sk_xprt.xpt_pool = NULL;
- clear_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
+ xprt->xpt_pool = NULL;
+ clear_bit(XPT_BUSY, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
}
-
+EXPORT_SYMBOL_GPL(svc_xprt_received);
/**
* svc_reserve - change the space reserved for the reply to a request.
@@ -766,7 +765,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
(serv->sv_nrthreads+3) * serv->sv_max_mesg);
if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
- svc_sock_received(svsk);
+ svc_xprt_received(&svsk->sk_xprt);
...Move the authinfo cache to svc_xprt. This allows both the TCP and RDMA
transports to share this logic. A flag bit is used to determine if
auth information is to be cached or not. Previously, this code looked
at the transport protocol.
I've also changed the spin_lock/unlock logic so that a lock is not taken for
transports that are not caching auth info.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc_xprt.h | 4 +++
include/linux/sunrpc/svcsock.h | 5 ----
net/sunrpc/svc_xprt.c | 4 +++
net/sunrpc/svcauth_unix.c | 54 +++++++++++++++++++++------------------
net/sunrpc/svcsock.c | 22 +++++++++-------
5 files changed, 49 insertions(+), 40 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index c416d05..dfb1d4d 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -50,11 +50,15 @@ struct svc_xprt {
#define XPT_OLD 9 /* used for xprt aging mark+sweep */
#define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */
+#define XPT_CACHE_AUTH 12 /* cache auth info */
struct svc_pool *xpt_pool; /* current pool iff queued */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
struct mutex xpt_mutex; /* to serialize sending data */
+ spinlock_t xpt_lock; /* protects sk_deferred
+ * and xpt_auth_cache */
+ void *xpt_auth_cache;/* auth cache */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 406d003..f2ed6a2 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -20,8 +20,6 @@ struct svc_sock {
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
- spinlock_t sk_lock; /* protects sk_deferred and
- * ...With the implementation of the new mark and sweep algorithm for shutting
down old connections, the sk_lastrecv field is no longer needed.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svcsock.h | 1 -
net/sunrpc/svcsock.c | 5 +----
2 files changed, 1 insertions(+), 5 deletions(-)
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 41c2dfa..406d003 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -33,7 +33,6 @@ struct svc_sock {
/* private TCP part */
int sk_reclen; /* length of record */
int sk_tcplen; /* current read length */
- time_t sk_lastrecv; /* time of last received request */
/* cache of various info for TCP sockets */
void *sk_info_authunix;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c637acf..a7af4cf 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1608,7 +1608,6 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
svc_sock_release(rqstp);
return -EAGAIN;
}
- svsk->sk_lastrecv = get_seconds();
clear_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags);
rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
@@ -1708,8 +1707,7 @@ svc_age_temp_sockets(unsigned long closure)
list_del_init(le);
svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
- dprintk("queuing svsk %p for closing, %lu seconds old\n",
- svsk, get_seconds() - svsk->sk_lastrecv);
+ dprintk("queuing svsk %p for closing\n", svsk);
/* a thread will dequeue and close it soon */
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -1757,7 +1755,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
svsk->sk_ostate = inet->sk_state_change;
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
- svsk->sk_lastrecv = get_seconds();
spin_lock_init(&svsk->sk_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
-
This patch moves the transport independent sk_deferred list to the svc_xprt
structure and updates the svc_deferred_req structure to keep pointers to
svc_xprt's directly. The deferral processing code is also moved out of the
transport dependent recvfrom functions and into the generic svc_recv path.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc.h | 2 +
include/linux/sunrpc/svc_xprt.h | 2 +
include/linux/sunrpc/svcsock.h | 3 --
net/sunrpc/svc_xprt.c | 1 +
net/sunrpc/svcsock.c | 58 +++++++++++++++++----------------------
5 files changed, 29 insertions(+), 37 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cfb2652..40adc9d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -320,7 +320,7 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp)
struct svc_deferred_req {
u32 prot; /* protocol (UDP or TCP) */
- struct svc_sock *svsk;
+ struct svc_xprt *xprt;
struct sockaddr_storage addr; /* where reply must go */
size_t addrlen;
union svc_addr_u daddr; /* where reply must come from */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index dfb1d4d..d93ae27 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -59,6 +59,8 @@ struct svc_xprt {
spinlock_t xpt_lock; /* protects sk_deferred
* and xpt_auth_cache */
void *xpt_auth_cache;/* auth cache */
+ struct list_head xpt_deferred; /* deferred requests that need
+ * to be revisted */
};
int svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index f2ed6a2..96a229e 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -20,9 +20,6 @@ struct svc_sock {
struct socket * sk_sock; /* berkeley socket layer */
struct sock * sk_sk; /* INET layer */
- struct ...This patch moves the transport sockaddr to the svc_xprt structure. Convenience functions are added to set and get the local and remote addresses of a transport from the transport provider as well as determine the length of a sockaddr. A transport is responsible for setting the xpt_local and xpt_remote addresses in the svc_xprt structure as part of transport creation and xpo_accept processing. This cannot be done in a generic way and in fact varies between TCP, UDP and RDMA. A set of xpo_ functions (e.g. getlocalname, getremotename) could have been added but this would have resulted in additional caching and copying of the addresses around. Note that the xpt_local address should also be set on listening endpoints; for TCP/RDMA this is done as part of endpoint creation. For connected transports like TCP and RDMA, the addresses never change and can be set once and copied into the rqstp structure for each request. For UDP, however, the local and remote addresses may change for each request. In this case, the address information is obtained from the UDP recvmsg info and copied into the rqstp structure from there. A svc_xprt_local_port function was also added that returns the local port given a transport. This is used by svc_create_xprt when returning the port associated with a newly created transport, and later when creating a generic find transport service to check if a service is already listening on a given port. Signed-off-by: Tom Tucker <tom@opengridcomputing.com> --- include/linux/sunrpc/svc_xprt.h | 51 ++++++++++++++++++++++++++++++++++++ include/linux/sunrpc/svcsock.h | 4 --- net/sunrpc/svc_xprt.c | 31 ++++++++++++++++++++-- net/sunrpc/svcsock.c | 56 +++++++++++++++++++++------------------ 4 files changed, 110 insertions(+), 32 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index d93ae27..a1d324a 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ ...
This function is transport independent. Change it to use svc_xprt directly
and change it's name to reflect this.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
net/sunrpc/svcsock.c | 37 +++++++++++++++++++------------------
1 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a6793b5..27cf09f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1658,49 +1658,50 @@ svc_send(struct svc_rqst *rqstp)
* Timer function to close old temporary sockets, using
* a mark-and-sweep algorithm.
*/
-static void
-svc_age_temp_sockets(unsigned long closure)
+static void svc_age_temp_xprts(unsigned long closure)
{
struct svc_serv *serv = (struct svc_serv *)closure;
- struct svc_sock *svsk;
+ struct svc_xprt *xprt;
struct list_head *le, *next;
LIST_HEAD(to_be_aged);
- dprintk("svc_age_temp_sockets\n");
+ dprintk("svc_age_temp_xprts\n");
if (!spin_trylock_bh(&serv->sv_lock)) {
/* busy, try again 1 sec later */
- dprintk("svc_age_temp_sockets: busy\n");
+ dprintk("svc_age_temp_xprts: busy\n");
mod_timer(&serv->sv_temptimer, jiffies + HZ);
return;
}
list_for_each_safe(le, next, &serv->sv_tempsocks) {
- svsk = list_entry(le, struct svc_sock, sk_xprt.xpt_list);
+ xprt = list_entry(le, struct svc_xprt, xpt_list);
- if (!test_and_set_bit(XPT_OLD, &svsk->sk_xprt.xpt_flags))
+ /* First time through, just mark it OLD. Second time
+ * through, close it. */
+ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags))
continue;
- if (atomic_read(&svsk->sk_xprt.xpt_ref.refcount) > 1
- || test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags))
+ if (atomic_read(&xprt->xpt_ref.refcount) > 1
+ || test_bit(XPT_BUSY, &xprt->xpt_flags))
continue;
- svc_xprt_get(&svsk->sk_xprt);
+ svc_xprt_get(xprt);
list_move(le, &to_be_aged);
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- set_bit(XPT_DETACHED, &svsk->sk_xprt.xpt_flags);
+ set_bit(XPT_CLOSE, ...Move the code that adds a transport instance to the sv_tempsocks and
sv_permsocks lists out of the transport specific functions and into core
logic.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
net/sunrpc/svc_xprt.c | 9 ++++++++-
net/sunrpc/svcsock.c | 39 +++++++++++++++++++--------------------
2 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d0cbfe0..924df63 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -145,8 +145,15 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
ret = PTR_ERR(newxprt);
- } else
+ } else {
+ clear_bit(XPT_TEMP,
+ &newxprt->xpt_flags);
+ spin_lock_bh(&serv->sv_lock);
+ list_add(&newxprt->xpt_list,
+ &serv->sv_permsocks);
+ spin_unlock_bh(&serv->sv_lock);
ret = svc_xprt_local_port(newxprt);
+ }
}
goto out;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 27cf09f..f284bdf 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -94,6 +94,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
struct sockaddr *, int, int);
+static void svc_age_temp_xprts(unsigned long closure);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
@@ -1574,7 +1575,20 @@ svc_recv(struct svc_rqst *rqstp, long timeout)
* listener holds a reference too
*/
__module_get(newxpt->xpt_class->xcl_owner);
- svc_check_conn_limits(svsk->sk_xprt.xpt_server);
+ svc_check_conn_limits(xprt->xpt_server);
+ spin_lock_bh(&serv->sv_lock);
+ set_bit(XPT_TEMP, &newxpt->xpt_flags);
+ list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
+ serv->sv_tmpcnt++;
+ if ...The nesting here is getting kind of deep.
A bit more idiomatic for kernel code would be (untested) something like:
list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
struct svc_xprt *newxprt;
if (strcmp(xprt_name, xcl->xcl_name) != 0)
continue;
spin_unlock(&svc_xprt_class_lock);
if (!try_module_get(xcl->xcl_owner))
goto out;
# (assuming that's the right order)
newxprt = xcl->xcl_ops->xpo_create (serv,
(struct sockaddr *)&sin, sizeof(sin), flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
ret = PTR_ERR(newxprt);
goto out;
}
clear_bit(XPT_TEMP, &newxprt->xpt_flags);
spin_lock_bh(&serv->sv_lock);
list_add(&newxprt->xpt_list, &serv->sv_permsocks);
spin_unlock_bh(&serv->sv_lock);
ret = svc_xprt_local_port(newxprt);
goto out;
}
At least to my eyes, that also makes it a lot easier to see what the
important (succesful) case is, since that's the code that stays
unindented all the way to the end.
I'd also be inclined to replace those "got out"'s by "returns". I don't
really see the point of the label when it's leads to something that's
literally just a return.
--b.
-
Some transports have a header in front of the RPC header. The current
defer/revisit processing considers only the iov_len and arg_len to
determine how much to back up when saving the original request
to revisit. Add a field to the rqstp structure to save the size
of the transport header so svc_defer can correctly compute
the start of a request.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc.h | 2 ++
net/sunrpc/svc_xprt.c | 36 +++++++++++++++++++++++++++---------
net/sunrpc/svcsock.c | 2 ++
3 files changed, 31 insertions(+), 9 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04eb20e..742ab46 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -217,6 +217,7 @@ struct svc_rqst {
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
+ size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
struct xdr_buf rq_res;
struct page * rq_pages[RPCSVC_MAXPAGES];
@@ -322,6 +323,7 @@ struct svc_deferred_req {
size_t addrlen;
union svc_addr_u daddr; /* where reply must come from */
struct cache_deferred_req handle;
+ size_t xprt_hlen;
int argslen;
__be32 args[0];
};
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index a477883..fdaac77 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -29,7 +29,6 @@
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
@@ -844,10 +843,18 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
svc_xprt_put(xprt);
}
+/*
+ * Save the request off for later processing. The request buffer looks
+ * like this:
+ *
+ * <xprt-header><rpc-header><rpc-pagelist><rpc-tail>
+ *
+ * This code can only ...This functionally empty patch removes rq_sock and unamed union
from rqstp structure.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
include/linux/sunrpc/svc.h | 5 +----
net/sunrpc/svcsock.c | 38 ++++++++++++++++++++++++--------------
2 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 40adc9d..04eb20e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -204,10 +204,7 @@ union svc_addr_u {
struct svc_rqst {
struct list_head rq_list; /* idle list */
struct list_head rq_all; /* all threads list */
- union {
- struct svc_xprt * rq_xprt; /* transport ptr */
- struct svc_sock * rq_sock; /* socket ptr */
- };
+ struct svc_xprt * rq_xprt; /* transport ptr */
struct sockaddr_storage rq_addr; /* peer address */
size_t rq_addrlen;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f284bdf..a785055 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -201,10 +201,12 @@ static void svc_release_skb(struct svc_rqst *rqstp)
struct svc_deferred_req *dr = rqstp->rq_deferred;
if (skb) {
+ struct svc_sock *svsk =
+ container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
rqstp->rq_xprt_ctxt = NULL;
dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
- skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
+ skb_free_datagram(svsk->sk_sk, skb);
}
if (dr) {
rqstp->rq_deferred = NULL;
@@ -415,7 +417,7 @@ svc_wake_up(struct svc_serv *serv)
dprintk("svc: daemon %p woken up.\n", rqstp);
/*
svc_thread_dequeue(pool, rqstp);
- rqstp->rq_sock = NULL;
+ rqstp->rq_xprt = NULL;
*/
wake_up(&rqstp->rq_wait);
}
@@ -432,7 +434,9 @@ union svc_pktinfo_u {
static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
{
- switch (rqstp->rq_sock->sk_sk->sk_family) {
+ struct svc_sock *svsk =
+ container_of(rqstp->rq_xprt, struct svc_sock, ...I know I asked before, and can't remember what happened: has this been
tested with krb5p? (I know nobody cares much whether krb5p/rdma works,
I just want to make sure krb5p/rdma doesn't oops, and that
krb5p/{tcp,udp} are unchanged.)
I'll try it eventually if you don't....
--b.
-
I haven't tested this combination. I definitely don't have a kerberos setup, -
Update the write handler for the portlist file to allow creating new
listening endpoints on a transport. The general form of the string is:
<transport_name><space><port number>
For example:
echo "tcp 2049" > /proc/fs/nfsd/portlist
This is intended to support the creation of a listening endpoint for
RDMA transports without adding #ifdef code to the nfssvc.c file.
Transports can also be removed as follows:
'-'<transport_name><space><port number>
For example:
echo "-tcp 2049" > /proc/fs/nfsd/portlist
Attempting to add a listener with an invalid transport string results
in EPROTONOSUPPORT and a perror string of "Protocol not supported".
Attempting to remove an non-existent listener (.e.g. bad proto or port)
results in ENOTCONN and a perror string of
"Transport endpoint is not connected"
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
fs/nfsd/nfsctl.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++-
net/sunrpc/svc_xprt.c | 1 +
2 files changed, 52 insertions(+), 1 deletions(-)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 77dc989..5b9ed0e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -540,7 +540,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
}
return err < 0 ? err : 0;
}
- if (buf[0] == '-') {
+ if (buf[0] == '-' && isdigit(buf[1])) {
char *toclose = kstrdup(buf+1, GFP_KERNEL);
int len = 0;
if (!toclose)
@@ -554,6 +554,56 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
kfree(toclose);
return len;
}
+ /*
+ * Add a transport listener by writing it's transport name
+ */
+ if (isalpha(buf[0])) {
+ int err;
+ char transport[16];
+ int port;
+ if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
+ err = nfsd_create_serv();
+ if (!err) {
+ if (svc_find_xprt(nfsd_serv, transport,
+ AF_UNSPEC, port))
+ return -EADDRINUSE;
+
+ err = svc_create_xprt(nfsd_serv,
+ transport, ...Create a transport independent version of the svc_sock_names function.
The toclose capability of the svc_sock_names service can be implemented
using the svc_xprt_find and svc_xprt_close services.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
fs/nfsd/nfsctl.c | 2 +-
include/linux/sunrpc/svc_xprt.h | 1 +
net/sunrpc/svc_xprt.c | 35 +++++++++++++++++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletions(-)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5b9ed0e..86d084e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -503,7 +503,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0;
lock_kernel();
if (nfsd_serv)
- len = svc_sock_names(buf, nfsd_serv, NULL);
+ len = svc_xprt_names(nfsd_serv, buf, 0);
unlock_kernel();
return len;
}
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 840b5c4..ed9791a 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -81,6 +81,7 @@ void svc_delete_xprt(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
int svc_print_xprts(char *buf, int maxlen);
struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int);
+int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
static inline void svc_xprt_get(struct svc_xprt *xprt)
{
kref_get(&xprt->xpt_ref);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6708aa2..2e3a672 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -998,3 +998,38 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
return found;
}
EXPORT_SYMBOL_GPL(svc_find_xprt);
+
+/*
+ * Format a buffer with a list of the active transports. A zero for
+ * the buflen parameter disables target buffer overflow checking.
+ */
+int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
+{
+ struct svc_xprt *xprt;
+ char ...Should we delete the toclose checks from svc_sock_names(), then, under the assumption it's always called with toclose non-NULL now? And why can't we just completely replace svc_sock_names() at this point? -
The svc_check_conn_limits function only manipulates xprt fields. Change references
to svc_sock->sk_xprt to svc_xprt directly.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
---
net/sunrpc/svcsock.c | 28 ++++++++++++++--------------
1 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a785055..8578936 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1447,34 +1447,34 @@ static void svc_check_conn_limits(struct svc_serv *serv)
char buf[RPC_MAX_ADDRBUFLEN];
if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
- struct svc_sock *svsk = NULL;
+ struct svc_xprt *xprt = NULL;
spin_lock_bh(&serv->sv_lock);
if (!list_empty(&serv->sv_tempsocks)) {
if (net_ratelimit()) {
/* Try to help the admin */
- printk(KERN_NOTICE "%s: too many open TCP "
- "sockets, consider increasing the "
+ printk(KERN_NOTICE "%s: too many open "
+ "connections, consider increasing the "
"number of nfsd threads\n",
- serv->sv_name);
+ serv->sv_name);
printk(KERN_NOTICE
- "%s: last TCP connect from %s\n",
+ "%s: last connection from %s\n",
serv->sv_name, buf);
}
/*
- * Always select the oldest socket. It's not fair,
+ * Always select the oldest connection. It's not fair,
* but so is life
*/
- svsk = list_entry(serv->sv_tempsocks.prev,
- struct svc_sock,
- sk_xprt.xpt_list);
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- svc_xprt_get(&svsk->sk_xprt);
+ xprt = list_entry(serv->sv_tempsocks.prev,
+ struct svc_xprt,
+ xpt_list);
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_get(xprt);
}
spin_unlock_bh(&serv->sv_lock);
- if (svsk) {
- svc_xprt_enqueue(&svsk->sk_xprt);
- svc_xprt_put(&svsk->sk_xprt);
+ if (xprt) {
+ svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
}
}
}
-
