The first three patches here fix actual bugs. I think
the last two will reduce the chance for any future bugs
to creep in. RFC for now.--
This is a bug fix for the r/o bind mount patch set.
We need to ensure taking a mnt write on the mnt
referenced by any new struct file.---
lxc-dave/fs/open.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)diff -puN fs/open.c~get-write-in-__dentry_open fs/open.c
--- lxc/fs/open.c~get-write-in-__dentry_open 2007-09-27 11:51:31.000000000 -0700
+++ lxc-dave/fs/open.c 2007-09-27 11:51:31.000000000 -0700
@@ -778,9 +778,15 @@ static struct file *__dentry_open(struct
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
- error = get_write_access(inode);
+ error = mnt_want_write(mnt);
if (error)
goto cleanup_file;
+
+ error = get_write_access(inode);
+ if (error) {
+ mnt_drop_write(mnt);
+ goto cleanup_file;
+ }
}f->f_mapping = inode->i_mapping;
@@ -820,8 +826,10 @@ static struct file *__dentry_open(structcleanup_all:
fops_put(f->f_op);
- if (f->f_mode & FMODE_WRITE)
+ if (f->f_mode & FMODE_WRITE) {
put_write_access(inode);
+ mnt_drop_write(mnt);
+ }
file_kill(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
_
-
may_open() can fail in a lot of ways. It is also named
such that it doesn't appear to be _taking_ action, just
checking a bunch of conditions.So, it makes a poor place to take and release the mnt
writer count. This moves the burder of taking the
mnt writer counts into the callers. The callers have
the advantage of much more visibility of the filp.
Since we rely on the __fput(filp) to release the mnt
writer count, this is a good thing.---
lxc-dave/fs/namei.c | 41 +++++++++++++++++++++++------------------
lxc-dave/fs/nfsctl.c | 20 ++++++++++++++++++--
2 files changed, 41 insertions(+), 20 deletions(-)diff -puN fs/namei.c~move-mnt_want_write-out-of-may_open fs/namei.c
--- lxc/fs/namei.c~move-mnt_want_write-out-of-may_open 2007-09-27 11:51:32.000000000 -0700
+++ lxc-dave/fs/namei.c 2007-09-27 11:51:32.000000000 -0700
@@ -1621,14 +1621,6 @@ int may_open(struct nameidata *nd, int a
return -EACCES;flag &= ~O_TRUNC;
- } else if (flag & FMODE_WRITE) {
- /*
- * effectively: !special_file()
- * balanced by __fput()
- */
- error = mnt_want_write(nd->mnt);
- if (error)
- return error;
}error = vfs_permission(nd, acc_mode);
@@ -1687,10 +1679,8 @@ static int open_namei_create(struct name
struct dentry *dir = nd->dentry;/*
- * This ensures that the mnt stays writable
- * over the vfs_create() call to may_open(),
- * which takes a more persistent
- * mnt_want_write().
+ * This mnt_want_write() is potentially persistent,
+ * and balanced in __fput()
*/
error = mnt_want_write(nd->mnt);
if (error) {
@@ -1710,14 +1700,18 @@ static int open_namei_create(struct name
/* Don't check for write permission, don't truncate */
error = may_open(nd, 0, flag & ~O_TRUNC);
out:
- /*
- * if needed, may_open() has taken a write
- * on the mnt for us, so we can release ours.
- */
- mnt_drop_write(nd->mnt);
+ if (error)
+ mnt_drop_write(nd->mnt);
return error;
}+static inline int wr...
My end goal here is to make sure all users of may_open()
return filps. This will ensure that we properly release
mount write counts which were taken for the filp in
may_open().This patch moves the sys_open flags to namei flags
calculation into fs/namei.c. We'll shortly be moving
the nameidata_to_filp() calls into namei.c, and this
gets the sys_open flags to a place where we can get
at them when we need them.---
lxc-dave/fs/namei.c | 43 ++++++++++++++++++++++++++++++++++---------
lxc-dave/fs/open.c | 22 ++--------------------
2 files changed, 36 insertions(+), 29 deletions(-)diff -puN fs/namei.c~do-namei_flags-calculation-inside-open_namei fs/namei.c
--- lxc/fs/namei.c~do-namei_flags-calculation-inside-open_namei 2007-09-27 11:51:33.000000000 -0700
+++ lxc-dave/fs/namei.c 2007-09-27 11:51:33.000000000 -0700
@@ -1672,7 +1672,12 @@ int may_open(struct nameidata *nd, int a
return 0;
}-static int open_namei_create(struct nameidata *nd, struct path *path,
+/*
+ * Be careful about ever adding any more callers of this
+ * function. Its flags must be in the namei format, not
+ * what get passed to sys_open().
+ */
+static int __open_namei_create(struct nameidata *nd, struct path *path,
int flag, int mode)
{
int error;
@@ -1713,26 +1718,46 @@ static inline int write_may_occur_to_fil
}/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ * 00 - read-only
+ * 01 - write-only
+ * 10 - read-write
+ * 11 - special
+ * it is changed into
+ * 00 - no permissions needed
+ * 01 - read-permission
+ * 10 - write-permission
+ * 11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int sys_open_flags_to_namei_flags(int flag)
+{
+ if ((flag+1) & O_ACCMODE)
+ flag++;
+ return flag;
+}
+
+/*
* open_namei()
*
* namei f...
If open_namei() succeeds, there is potentially a mnt_want_write()
that needs to get balanced. If the caller doesn't create a
'struct file' and eventually __fput() it, or manually drop the
write count on an error, we have a bug.Forcing open_namei() to return a filp fixes this. Any caller
getting a 'struct file' back must consider that filp instantiated
and fput() it normally. The callers no longer have to worry about
ever manually releasing a mnt write count.---
lxc-dave/fs/namei.c | 16 ++++++++--------
lxc-dave/fs/open.c | 6 +-----
lxc-dave/include/linux/fs.h | 2 +-
3 files changed, 10 insertions(+), 14 deletions(-)diff -puN fs/namei.c~make-open_namei-return-a-filp fs/namei.c
--- lxc/fs/namei.c~make-open_namei-return-a-filp 2007-09-27 11:51:34.000000000 -0700
+++ lxc-dave/fs/namei.c 2007-09-27 11:51:34.000000000 -0700
@@ -1750,8 +1750,8 @@ static inline int sys_open_flags_to_name
* system call. See sys_open_flags_to_namei_flags().
* SMP-safe
*/
-int open_namei(int dfd, const char *pathname, int sys_open_flag,
- int mode, struct nameidata *nd)
+struct file *open_namei(int dfd, const char *pathname, int sys_open_flag,
+ int mode, struct nameidata *nd)
{
int acc_mode, error;
struct path path;
@@ -1777,7 +1777,7 @@ int open_namei(int dfd, const char *path
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
nd, flag);
if (error)
- return error;
+ return ERR_PTR(error);
goto ok;
}@@ -1786,7 +1786,7 @@ int open_namei(int dfd, const char *path
*/
error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
if (error)
- return error;
+ return ERR_PTR(error);/*
* We have the parent and last component. First of all, check
@@ -1820,7 +1820,7 @@ do_last:
error = __open_namei_create(nd, &path, flag, mode);
if (error)
goto exit;
- return 0;
+ return nameidata_to_filp(nd, sys_open_flag);
}/*
@@ -1864,7 +1864,7 @@ ok:
mnt_drop_write(...
Error is unused now, and it's also rather silly to allocate the
nd here when it's only used inside open_namei. So I'd suggest
killing do_filp_open, and maybe filp_open aswell while you're at
it.
-
In a moment, we're going to make may_open() stop doing
the mnt_want/drop_write() pair. Doing this first makes
the next patch simpler.---
lxc-dave/fs/namei.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)diff -puN fs/namei.c~move-mnt_want_write-into-open_namei_create fs/namei.c
--- lxc/fs/namei.c~move-mnt_want_write-into-open_namei_create 2007-09-27 11:51:32.000000000 -0700
+++ lxc-dave/fs/namei.c 2007-09-27 11:51:32.000000000 -0700
@@ -1686,6 +1686,19 @@ static int open_namei_create(struct name
int error;
struct dentry *dir = nd->dentry;+ /*
+ * This ensures that the mnt stays writable
+ * over the vfs_create() call to may_open(),
+ * which takes a more persistent
+ * mnt_want_write().
+ */
+ error = mnt_want_write(nd->mnt);
+ if (error) {
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(nd->dentry);
+ return error;
+ }
+
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask;
error = vfs_create(dir->d_inode, path->dentry, mode, nd);
@@ -1693,9 +1706,16 @@ static int open_namei_create(struct name
dput(nd->dentry);
nd->dentry = path->dentry;
if (error)
- return error;
+ goto out;
/* Don't check for write permission, don't truncate */
- return may_open(nd, 0, flag & ~O_TRUNC);
+ error = may_open(nd, 0, flag & ~O_TRUNC);
+out:
+ /*
+ * if needed, may_open() has taken a write
+ * on the mnt for us, so we can release ours.
+ */
+ mnt_drop_write(nd->mnt);
+ return error;
}/*
@@ -1778,11 +1798,7 @@ do_last:/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
- error = mnt_want_write(nd->mnt);
- if (error)
- goto exit_mutex_unlock;
error = open_namei_create(nd, &path, flag, mode);
- mnt_drop_write(nd->mnt);
if (error)
goto exit;
return 0;
_
-
| Tony Lindgren | [PATCH 26/90] ARM: OMAP: abstract debug card setup (smc, leds) |
| Greg Kroah-Hartman | [PATCH 001/196] Chinese: Add the known_regression URI to the HOWTO |
| Vladislav Bolkhovitin | Re: Integration of SCST in the mainstream Linux kernel |
| Jesper Juhl | Re: Dual-Licensing Linux Kernel with GPL V2 and GPL V3 |
git: | |
| Gerrit Renker | [PATCH 27/37] dccp: Integration of dynamic feature activation - part 2 (server side) |
| Jarek Poplawski | [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| David Miller | [GIT]: Networking |
| Frans Pop | svc: failed to register lockdv1 RPC service (errno 97). |
