Prepare nfs/sunrpc stack to use multiple instances of rpc_pipefs.
Only for client for now.
Changelog:
v2:
- one of rpc_create() calls was missed initially, fixed;
- change logic for get_rpc_pipefs(NULL);
- export get_rpc_pipefs() to be able to use from modules
(tnx J. Bruce Field);
- change "From:" and "Signed-off-by:" addresses.
v1:
- initial revision of the patchset.
Kirill A. Shutemov (12):
sunrpc: mount rpc_pipefs on initialization
sunrpc: introduce init_rpc_pipefs
sunrpc: push init_rpc_pipefs up to rpc_create() callers
sunrpc: tag svc_serv with rpc_pipefs mount point
sunrpc: get rpc_pipefs mount point for svc_serv from callers
lockd: get rpc_pipefs mount point from callers
sunrpc: get rpc_pipefs mount point for rpcb_create[_local] from callers
sunrpc: tag pipefs field of cache_detail with rpc_pipefs mount point
nfs: per-rpc_pipefs dns cache
sunrpc: introduce get_rpc_pipefs()
nfs: introduce mount option 'rpcmount'
sunrpc: make rpc_pipefs be mountable multiple times
fs/lockd/clntlock.c | 8 +-
fs/lockd/host.c | 12 +++-
fs/lockd/mon.c | 13 ++-
fs/lockd/svc.c | 4 +-
fs/nfs/cache_lib.c | 18 +---
fs/nfs/cache_lib.h | 3 +-
fs/nfs/callback.c | 6 +-
fs/nfs/callback.h | 3 +-
fs/nfs/client.c | 45 +++++++++--
fs/nfs/dns_resolve.c | 128 +++++++++++++++++++++++------
fs/nfs/dns_resolve.h | 8 +--
fs/nfs/inode.c | 8 +--
fs/nfs/internal.h | 10 ++-
fs/nfs/mount_clnt.c | 1 +
fs/nfs/namespace.c | 3 +-
fs/nfs/nfs4namespace.c | 20 +++--
fs/nfs/super.c | 20 +++++
fs/nfsd/nfs4callback.c | 2 +
fs/nfsd/nfssvc.c | 8 +-
include/linux/lockd/bind.h | 3 ...Mount rpc_pipefs on register_rpc_pipefs() and replace
rpc_get_mount()/rpc_put_mount() implementation with mntget()/mntput().
Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
net/sunrpc/rpc_pipe.c | 27 ++++++++++++++++-----------
1 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 10a17a3..7f3fbdd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -29,7 +29,6 @@
#include <linux/sunrpc/cache.h>
static struct vfsmount *rpc_mnt __read_mostly;
-static int rpc_mount_count;
static struct file_system_type rpc_pipe_fs_type;
@@ -415,18 +414,13 @@ struct rpc_filelist {
struct vfsmount *rpc_get_mount(void)
{
- int err;
-
- err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
- if (err != 0)
- return ERR_PTR(err);
- return rpc_mnt;
+ return mntget(rpc_mnt);
}
EXPORT_SYMBOL_GPL(rpc_get_mount);
void rpc_put_mount(void)
{
- simple_release_fs(&rpc_mnt, &rpc_mount_count);
+ mntput(rpc_mnt);
}
EXPORT_SYMBOL_GPL(rpc_put_mount);
@@ -1063,16 +1057,27 @@ int register_rpc_pipefs(void)
if (!rpc_inode_cachep)
return -ENOMEM;
err = register_filesystem(&rpc_pipe_fs_type);
- if (err) {
- kmem_cache_destroy(rpc_inode_cachep);
- return err;
+ if (err)
+ goto destroy_cache;
+
+ rpc_mnt = kern_mount(&rpc_pipe_fs_type);
+ if (IS_ERR(rpc_mnt)) {
+ err = PTR_ERR(rpc_mnt);
+ goto unregister_fs;
}
return 0;
+
+unregister_fs:
+ unregister_filesystem(&rpc_pipe_fs_type);
+destroy_cache:
+ kmem_cache_destroy(rpc_inode_cachep);
+ return err;
}
void unregister_rpc_pipefs(void)
{
+ mntput(rpc_mnt);
kmem_cache_destroy(rpc_inode_cachep);
unregister_filesystem(&rpc_pipe_fs_type);
}
--
1.7.3.4
--
Get rpc_pipefs mount point by path.
Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
include/linux/sunrpc/rpc_pipe_fs.h | 2 +
net/sunrpc/rpc_pipe.c | 38 ++++++++++++++++++++++++++++++++++++
2 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index b09bfa5..922057c 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -46,6 +46,8 @@ RPC_I(struct inode *inode)
extern struct vfsmount *init_rpc_pipefs;
+struct vfsmount *get_rpc_pipefs(const char *path);
+
extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
struct rpc_clnt;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b1e299b..fec6b2d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -16,6 +16,7 @@
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/kernel.h>
+#include <linux/nsproxy.h>
#include <asm/ioctls.h>
#include <linux/fs.h>
@@ -931,6 +932,43 @@ static const struct super_operations s_ops = {
#define RPCAUTH_GSSMAGIC 0x67596969
+struct vfsmount *get_rpc_pipefs(const char *p)
+{
+ int error;
+ struct vfsmount *rpcmount;
+ struct path path;
+
+ if (!p) {
+ /* Try to get with default rpcmount mount point */
+ rpcmount = get_rpc_pipefs("/var/lib/nfs/rpc_pipefs");
+
+ /*
+ * If nothing was found at default mount point and init's
+ * mount namespace is in use, use init_rpc_pipefs
+ */
+ if (IS_ERR(rpcmount) && (current->nsproxy->mnt_ns ==
+ init_task.nsproxy->mnt_ns))
+ return mntget(init_rpc_pipefs);
+
+ return rpcmount;
+ }
+
+ error = kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
+ if (error)
+ return ERR_PTR(error);
+
+ if (path.mnt->mnt_sb->s_magic != RPCAUTH_GSSMAGIC) {
+ path_put(&path);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rpcmount = mntget(path.mnt);
+ path_put(&path);
+
+ return ...Lazy initialization of dns cache: on first call nfs_dns_resolve_name().
Every rpc_pipefs has separate dns cache now.
Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
fs/nfs/cache_lib.c | 17 ++-----
fs/nfs/cache_lib.h | 3 +-
fs/nfs/dns_resolve.c | 128 ++++++++++++++++++++++++++++++++++++++----------
fs/nfs/dns_resolve.h | 8 +---
fs/nfs/inode.c | 8 +---
fs/nfs/nfs4namespace.c | 4 +-
6 files changed, 113 insertions(+), 55 deletions(-)
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 0944d4e..9b99d9e 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -12,7 +12,6 @@
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/sunrpc/cache.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
#include "cache_lib.h"
@@ -111,25 +110,17 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
return 0;
}
-int nfs_cache_register(struct cache_detail *cd)
+int nfs_cache_register(struct cache_detail *cd, struct vfsmount *rpcmount)
{
struct nameidata nd;
- struct vfsmount *mnt;
int ret;
- mnt = mntget(init_rpc_pipefs);
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
- ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
+ ret = vfs_path_lookup(rpcmount->mnt_root, rpcmount, "/cache", 0, &nd);
if (ret)
- goto err;
- ret = sunrpc_cache_register_pipefs(mnt, nd.path.dentry,
+ return ret;
+ ret = sunrpc_cache_register_pipefs(rpcmount, nd.path.dentry,
cd->name, 0600, cd);
path_put(&nd.path);
- if (!ret)
- return ret;
-err:
- mntput(mnt);
return ret;
}
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 76f856e..1d4a0a5 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -23,5 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
-extern int nfs_cache_register(struct cache_detail ...To support containers, allow multiple independent instances of
rpc_pipefs. Use '-o newinstance' to create new of the filesystem.
The same semantics as with devpts.
Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
net/sunrpc/rpc_pipe.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 79 insertions(+), 1 deletions(-)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fec6b2d..7b693db 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -17,6 +17,7 @@
#include <linux/fsnotify.h>
#include <linux/kernel.h>
#include <linux/nsproxy.h>
+#include <linux/parser.h>
#include <asm/ioctls.h>
#include <linux/fs.h>
@@ -39,6 +40,49 @@ static struct kmem_cache *rpc_inode_cachep __read_mostly;
#define RPC_UPCALL_TIMEOUT (30*HZ)
+struct rpc_mount_opts {
+ int newinstance;
+};
+
+enum {
+ Opt_newinstance,
+
+ Opt_err
+};
+
+static const match_table_t tokens = {
+ {Opt_newinstance, "newinstance"},
+
+ {Opt_err, NULL}
+};
+
+static int
+parse_mount_options(char *data, struct rpc_mount_opts *opts)
+{
+ char *p;
+
+ opts->newinstance = 0;
+
+ while ((p = strsep(&data, ",")) != NULL) {
+ substring_t args[MAX_OPT_ARGS];
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_newinstance:
+ opts->newinstance = 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
void (*destroy_msg)(struct rpc_pipe_msg *), int err)
{
@@ -1039,11 +1083,45 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
+static int
+compare_rpc_mnt_sb(struct super_block *s, void *p)
+{
+ if (init_rpc_pipefs)
+ return init_rpc_pipefs->mnt_sb == s;
+ return 0;
+}
+
static struct dentry *
rpc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_single(fs_type, flags, ...Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
include/linux/sunrpc/clnt.h | 4 ++--
net/sunrpc/rpcb_clnt.c | 22 ++++++++++++----------
net/sunrpc/svc.c | 34 +++++++++++++++++++++-------------
3 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f052712..59eda38 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -135,10 +135,10 @@ void rpc_shutdown_client(struct rpc_clnt *);
void rpc_release_client(struct rpc_clnt *);
void rpc_task_release_client(struct rpc_task *);
-int rpcb_register(u32, u32, int, unsigned short);
+int rpcb_register(u32, u32, int, unsigned short, struct vfsmount *);
int rpcb_v4_register(const u32 program, const u32 version,
const struct sockaddr *address,
- const char *netid);
+ const char *netid, struct vfsmount *rpcmount);
void rpcb_getport_async(struct rpc_task *);
void rpc_call_start(struct rpc_task *);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 8d04380..867d177 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -27,7 +27,6 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xprtsock.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_BIND
@@ -175,7 +174,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
* Returns zero on success, otherwise a negative errno value
* is returned.
*/
-static int rpcb_create_local(void)
+static int rpcb_create_local(struct vfsmount *rpcmount)
{
struct rpc_create_args args = {
.net = &init_net,
@@ -187,7 +186,7 @@ static int rpcb_create_local(void)
.version = RPCBVERS_2,
.authflavor = RPC_AUTH_UNIX,
.flags = RPC_CLNT_CREATE_NOPING,
- .rpcmount = init_rpc_pipefs,
+ .rpcmount = rpcmount,
};
struct rpc_clnt *clnt, *clnt4;
int result = 0;
@@ -229,7 +228,8 @@ out:
}
static struct ...Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
fs/nfs/cache_lib.c | 3 +--
include/linux/sunrpc/cache.h | 9 +++------
net/sunrpc/cache.c | 16 ++++++++++------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index dd7ca5f..0944d4e 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -123,7 +123,7 @@ int nfs_cache_register(struct cache_detail *cd)
ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
if (ret)
goto err;
- ret = sunrpc_cache_register_pipefs(nd.path.dentry,
+ ret = sunrpc_cache_register_pipefs(mnt, nd.path.dentry,
cd->name, 0600, cd);
path_put(&nd.path);
if (!ret)
@@ -136,6 +136,5 @@ err:
void nfs_cache_unregister(struct cache_detail *cd)
{
sunrpc_cache_unregister_pipefs(cd);
- mntput(init_rpc_pipefs);
}
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 6950c98..d34a621 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -64,10 +64,6 @@ struct cache_detail_procfs {
struct proc_dir_entry *flush_ent, *channel_ent, *content_ent;
};
-struct cache_detail_pipefs {
- struct dentry *dir;
-};
-
struct cache_detail {
struct module * owner;
int hash_size;
@@ -114,7 +110,7 @@ struct cache_detail {
union {
struct cache_detail_procfs procfs;
- struct cache_detail_pipefs pipefs;
+ struct path pipefs;
} u;
};
@@ -201,7 +197,8 @@ extern int cache_register_net(struct cache_detail *cd, struct net *net);
extern void cache_unregister(struct cache_detail *cd);
extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
-extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
+extern int sunrpc_cache_register_pipefs(struct vfsmount *rpcmount,
+ struct dentry *parent, const char *,
mode_t, struct cache_detail *);
extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
diff --git ...It specifies rpc_pipefs to use. /var/lib/nfs/rpc_pipefs, by default.
Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
fs/nfs/callback.c | 6 ++--
fs/nfs/callback.h | 3 +-
fs/nfs/client.c | 46 ++++++++++++++++++++++++++++++++++++--------
fs/nfs/internal.h | 10 +++++++-
fs/nfs/mount_clnt.c | 3 +-
fs/nfs/namespace.c | 3 +-
fs/nfs/nfs4namespace.c | 22 +++++++++++---------
fs/nfs/super.c | 20 +++++++++++++++++++
include/linux/nfs_fs_sb.h | 1 +
9 files changed, 86 insertions(+), 28 deletions(-)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index bef6abd..ef6d206 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,7 +16,6 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
#if defined(CONFIG_NFS_V4_1)
#include <linux/sunrpc/bc_xprt.h>
#endif
@@ -239,7 +238,8 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
/*
* Bring up the callback thread if it is not already up.
*/
-int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
+int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt,
+ struct vfsmount *rpcmount)
{
struct svc_serv *serv = NULL;
struct svc_rqst *rqstp;
@@ -254,7 +254,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
nfs_callback_bc_serv(minorversion, xprt, cb_info);
goto out;
}
- serv = svc_create(&nfs4_callback_program, init_rpc_pipefs,
+ serv = svc_create(&nfs4_callback_program, rpcmount,
NFS4_CALLBACK_BUFSIZE, NULL);
if (!serv) {
ret = -ENOMEM;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd..ae27385 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -133,7 +133,8 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getat
extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
#ifdef ...Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
fs/lockd/clntlock.c | 8 +++++---
fs/lockd/host.c | 14 +++++++++++---
fs/lockd/mon.c | 15 ++++++++-------
fs/lockd/svc.c | 6 ++----
fs/nfs/client.c | 1 +
fs/nfsd/nfssvc.c | 2 +-
include/linux/lockd/bind.h | 3 ++-
include/linux/lockd/lockd.h | 4 +++-
8 files changed, 33 insertions(+), 20 deletions(-)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 25509eb..1179c18 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -56,13 +56,14 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4;
int status;
- status = lockd_up();
+ status = lockd_up(nlm_init->rpcmount);
if (status < 0)
return ERR_PTR(status);
host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
nlm_init->protocol, nlm_version,
- nlm_init->hostname, nlm_init->noresvport);
+ nlm_init->hostname, nlm_init->noresvport,
+ nlm_init->rpcmount);
if (host == NULL) {
lockd_down();
return ERR_PTR(-ENOLCK);
@@ -223,7 +224,8 @@ reclaimer(void *ptr)
allow_signal(SIGKILL);
down_write(&host->h_rwsem);
- lockd_up(); /* note: this cannot fail as lockd is already running */
+ /* note: this cannot fail as lockd is already running */
+ lockd_up(host->h_rpcmount);
dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index b033a2d..757d1d3 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -14,9 +14,9 @@
#include <linux/in6.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/lockd/lockd.h>
#include <linux/mutex.h>
+#include <linux/mount.h>
#include <net/ipv6.h>
@@ -44,6 +44,7 @@ struct nlm_lookup_host_info {
const struct sockaddr *src_sap; /* our address ...Signed-off-by: Kirill A. Shutemov <kas@openvz.org>
---
fs/lockd/svc.c | 4 +++-
fs/nfs/callback.c | 4 +++-
fs/nfsd/nfssvc.c | 6 ++++--
include/linux/sunrpc/svc.h | 8 ++++----
net/sunrpc/svc.c | 18 +++++++++---------
5 files changed, 23 insertions(+), 17 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abfff9d..32310b1 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -31,6 +31,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
#include <net/ip.h>
#include <linux/lockd/lockd.h>
#include <linux/nfs.h>
@@ -269,7 +270,8 @@ int lockd_up(void)
"lockd_up: no pid, %d users??\n", nlmsvc_users);
error = -ENOMEM;
- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
+ serv = svc_create(&nlmsvc_program, init_rpc_pipefs, LOCKD_BUFSIZE,
+ NULL);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
goto out;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 93a8b3b..bef6abd 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,6 +16,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
#if defined(CONFIG_NFS_V4_1)
#include <linux/sunrpc/bc_xprt.h>
#endif
@@ -253,7 +254,8 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
nfs_callback_bc_serv(minorversion, xprt, cb_info);
goto out;
}
- serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
+ serv = svc_create(&nfs4_callback_program, init_rpc_pipefs,
+ NFS4_CALLBACK_BUFSIZE, NULL);
if (!serv) {
ret = -ENOMEM;
goto out_err;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2bae1d8..d96c32b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -13,6 +13,7 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
+#include ...Signed-off-by: Kirill A. Shutemov <kas@openvz.org> --- fs/lockd/host.c | 2 ++ fs/lockd/mon.c | 2 ++ fs/nfs/client.c | 2 ++ fs/nfs/mount_clnt.c | 2 ++ fs/nfsd/nfs4callback.c | 2 ++ include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 11 +++++++---- net/sunrpc/rpcb_clnt.c | 3 +++ 8 files changed, 21 insertions(+), 4 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index ed0c59f..b033a2d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -14,6 +14,7 @@ #include <linux/in6.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/lockd/lockd.h> #include <linux/mutex.h> @@ -360,6 +361,7 @@ nlm_bind_host(struct nlm_host *host) .authflavor = RPC_AUTH_UNIX, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), + .rpcmount = init_rpc_pipefs, }; /* diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e0c9189..37e5328 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -15,6 +15,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/svc.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/lockd/lockd.h> #include <asm/unaligned.h> @@ -78,6 +79,7 @@ static struct rpc_clnt *nsm_create(void) .version = NSM_VERSION, .authflavor = RPC_AUTH_NULL, .flags = RPC_CLNT_CREATE_NOPING, + .rpcmount = init_rpc_pipefs, }; return rpc_create(&args); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0870d0d..e041f39 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -25,6 +25,7 @@ #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/xprtrdma.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/nfs4_mount.h> @@ -614,6 +615,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, ...
What would a test case for this look like? (Is there some way to tell an nfs mount to use a specific instance of rpc_pipefs or something?) Rob --
You can create a new instance of rpc_pipefs using 'newinstance' mountoption. Then you can specify which rpc_pipefs to use with 'rpcmount' mountoption of nfs mount. If none specifed, '/var/lib/nfs/rpc_pipefs' uses by default. If no rpcmount mountoption, no rpc_pipefs was found at '/var/lib/nfs/rpc_pipefs' and we are in init's mount namespace, we use init_rpc_pipefs. -- Kirill A. Shutemov --
It's the "we are in init's mount namespace" that I was wondering about. So if I naievely chroot, nfs mount stops working the way it did before I chrooted unless I do an extra setup step? I'm actually poking at getting nfs mount working in LXC containers with different network routing (mostly study so far, it took me a couple weeks just to get lxc to work for me and now I'm trying to wrap my head around Linux's NFS implementation), so I'm very interested in this... Rob --
No. It will work as before since you are still in init's mount namespace. -- Kirill A. Shutemov --
Yep. It's bad, but there is way to overwrite the default. Other way is to leave 'rpcmount' mountoption without default. get_rpc_pipefs(NULL) in init's mount namespace will always return init_rpc_pipefs, without filesystem lookup. get_rpc_pipefs(NULL) in non-init's mount namespace will always return error. So you will have to specify 'rpcmount' mountoption for every nfs mount in container. Hmm, I guess, it may confuse user. What should we do if we have several rpc_pipefs mounts in the namespace? -- Kirill A. Shutemov --
/proc/sys/kernel/hotplug exists to tell the kernel where to find the hotplug binary. Once upon a time /sys/hotplug was the default value, and that was there to overwrite it. (They changed the default to blank (disabled) not due to policy reasons, but due to adding the netlink hotplug notification mechanism and making that the default.) I bring that up to point out that the general consensus about policy in the kernel seems to be "when you really really can't avoid having any, make a sane default the user can override". (Of course adding another entry to the crawling horror of /proc may not be an improvement. But individual overrides at the mount -o level seem You mean more than one inside a given process's view of the filesystem, taking into account chroot like /proc/mounts does? Before this patch series, there was one instance systemwide. The patch changed that to look a fixed location in the filesystem relative to the current chroot. Either way, there was one instance available to a given process doing an nfs mount. What's the use case for having more than one visible to a given process? (NUMA scalability? Some sort of multipath/VPN routing context?) Rob --
It's no so obvious for me why we should restrict it. ;) Currently, there is no association between rpc_pipefs and mount namespace, so I don't see simple way to restrict number of rpc_pipefs per mount namespace. Associating mount namespace with rpc_pipefs is not a good idea, I think. -- Kirill A. Shutemov --
I was pointing out it's been done before. I'd prefer autodetecting it so new namespaces and the base namespace don't have magic policy _or_ require different mount invocations. An ability to change the default for a value is less appealing than not needing the value in the first place. And changing the default would probably have to be per-container anyway to be useful. (Which isn't _quite_ the same as per-namespace since you can chroot without CLONE_NEWNS.) (I keep thinking back to web service providers offering cheap web hosting "with root access" via openvz containers and such. They're administering their own boxes, but aren't big iron guys. This is yet another thing for them to understand that didn't apply to the linux box they have at home, and I'm just wondering if there's a way they don't You can still provide a specific location with "-o rpcmount=/blah", correct? So this isn't restricting it, this is autodetecting the There is in that the root context doesn't need to have this mounted, and new namespaces do. So there's an existing association between a LACK of a namespace and a different default behavior. My understanding (correct me if I'm wrong) is that the historical behavior is that there's only one, and it doesn't actually live anywhere in the filesystem tree. You're adding a special location. I'm I'm talking about associating a default rpc_pipefs instance with a namespace, which it seems to me you're already doing by emulating the legacy behavior. Before you CLONE_NEWNS you get a magic default mount that doesn't exist in the tree. After you CLONE_NEWNS you get something like -EINVAL unless you supply your own default. (I'm actually not sure why new namespaces don't fall back to the magic global one...) I'm suggesting that if the user doesn't specify -o rpcmount then the default could be the first rpc_pipefs mount visible to the current process context, rather than a specific path. Logic to do that exists in the ...
/var/lib/net/rpc_pipefs is default path where userspace part of NFS stack
Root namespace is special. In case of nfsroot you need rpc_pipefs before
It breaks isolation. Container should not use host's rpc_pipefs without
static int check_rpc_pipefs(struct vfsmount *mnt, void *arg)
{
struct vfsmount **rpcmount = arg;
struct path path = {
.mnt = mnt,
.dentry = mnt->mnt_root,
};
if (!mnt->mnt_sb)
return 0;
if (mnt->mnt_sb->s_magic != RPCAUTH_GSSMAGIC)
return 0;
if (!path_is_under(&path, &current->fs->root))
return 0;
*rpcmount = mntget(mnt);
return 1;
}
struct vfsmount *get_rpc_pipefs(const char *p)
{
int error;
struct vfsmount *rpcmount = ERR_PTR(-EINVAL);
struct path path;
if (!p) {
iterate_mounts(check_rpc_pipefs, &rpcmount,
current->nsproxy->mnt_ns->root);
if (IS_ERR(rpcmount) && (current->nsproxy->mnt_ns ==
init_task.nsproxy->mnt_ns))
return mntget(init_rpc_pipefs);
return rpcmount;
}
error = kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
if (error)
return ERR_PTR(error);
check_rpc_pipefs(path.mnt, &rpcmount);
path_put(&path);
return rpcmount;
}
EXPORT_SYMBOL_GPL(get_rpc_pipefs);
Something like this? Patch to replace patch #10 attached.
--
Kirill A. Shutemov
Any comments? -- Kirill A. Shutemov --
Looks good to me. Thanks. acked-by: Rob Landley <rlandley@parallels.com> Rob --
The kernel doesn't give a damn about the /var/lib/nfs/rpc_pipefs bit. That's all for the benefit of the userland utilities. Trond -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@netapp.com www.netapp.com --
Are you saying that if you go into a container and that mount point doesn't exist, the kernel will still be able to find and use rpc_pipefs? Without userspace creating a specific magic path and mounting a filesystem on it? If so, I misread the patch... Rob --
