Hello, all. This is the second take of sysfs-cleanup patchset. Changes from the last take[L] are... * wrong patch description updated * comment updated * first two patches were accepted into -gregkh and dropped from this series. Thanks. -- tejun [L] http://thread.gmane.org/gmane.linux.kernel/563067 -
With the shadow directories gone, sysfs_rename_dir() can be simplified.
* parent doesn't need to be grabbed separately. Just access
old_dentry->d_parent.
* parent sd can never change. Remove code to move under the new
parent.
* Massage comments a bit.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
fs/sysfs/dir.c | 26 ++++----------------------
1 files changed, 4 insertions(+), 22 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 026ea70..0fe6aa3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -883,14 +883,10 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
struct sysfs_dirent *sd;
struct dentry *parent = NULL;
struct dentry *old_dentry = NULL, *new_dentry = NULL;
- struct sysfs_dirent *parent_sd;
const char *dup_name = NULL;
int error;
- if (!kobj->parent)
- return -EINVAL;
-
- /* get dentries */
+ /* get the original dentry */
sd = kobj->sd;
old_dentry = sysfs_get_dentry(sd);
if (IS_ERR(old_dentry)) {
@@ -898,12 +894,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
goto out_dput;
}
- parent_sd = kobj->parent->sd;
- parent = sysfs_get_dentry(parent_sd);
- if (IS_ERR(parent)) {
- error = PTR_ERR(parent);
- goto out_dput;
- }
+ parent = old_dentry->d_parent;
/* lock parent and get dentry for new name */
mutex_lock(&parent->d_inode->i_mutex);
@@ -933,22 +924,14 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
goto out_drop;
mutex_lock(&sysfs_mutex);
-
dup_name = sd->s_name;
sd->s_name = new_name;
+ mutex_unlock(&sysfs_mutex);
- /* move under the new parent */
+ /* rename */
d_add(new_dentry, NULL);
d_move(sd->s_dentry, new_dentry);
- sysfs_unlink_sibling(sd);
- sysfs_get(parent_sd);
- sysfs_put(sd->s_parent);
- sd->s_parent = parent_sd;
- sysfs_link_sibling(sd);
-
- mutex_unlock(&sysfs_mutex);
-
error = 0;
goto out_unlock;
@@ -958,7 ...* remove space between * and symbol name in variable declaration.
* kill unnecessary new line.
* kill 'found' and test 'sd' instead.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
fs/sysfs/dir.c | 15 +++++----------
1 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 1c3a0dd..026ea70 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -756,24 +756,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct dentry *ret = NULL;
- struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
- struct sysfs_dirent * sd;
+ struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+ struct sysfs_dirent *sd;
struct bin_attribute *bin_attr;
struct inode *inode;
- int found = 0;
mutex_lock(&sysfs_mutex);
- for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
- if (sysfs_type(sd) &&
- !strcmp(sd->s_name, dentry->d_name.name)) {
- found = 1;
+ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
+ if (sysfs_type(sd) && !strcmp(sd->s_name, dentry->d_name.name))
break;
- }
- }
/* no such entry */
- if (!found)
+ if (!sd)
goto out_unlock;
/* attach dentry and inode */
--
1.5.0.3
-
With the previous sysfs_add_one() update, there is only one user of
the return value of sysfs_addrm_finish() and the user can switch to
testing @sd easily. Make sysfs_addrm_finish() return void for cleaner
semantics as suggested by Satyam Sharma.
This patch doesn't introduce any noticeable behavior change.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Satyam Sharma <satyam.sharma@gmail.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
fs/sysfs/dir.c | 7 +------
fs/sysfs/inode.c | 7 +++++--
fs/sysfs/sysfs.h | 2 +-
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 91934e1..3e5acc4 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -609,11 +609,8 @@ static void sysfs_drop_dentry(struct sysfs_dirent *sd)
*
* LOCKING:
* All mutexes acquired by sysfs_addrm_start() are released.
- *
- * RETURNS:
- * Number of added/removed sysfs_dirents since sysfs_addrm_start().
*/
-int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
+void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
{
/* release resources acquired by sysfs_addrm_start() */
mutex_unlock(&sysfs_mutex);
@@ -639,8 +636,6 @@ int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
sysfs_deactivate(sd);
sysfs_put(sd);
}
-
- return acxt->cnt;
}
/**
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index f05cda9..e74224e 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -201,7 +201,10 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
if (sd)
sysfs_remove_one(&acxt, sd);
- if (sysfs_addrm_finish(&acxt))
+ sysfs_addrm_finish(&acxt);
+
+ if (sd)
return 0;
- return -ENOENT;
+ else
+ return -ENOENT;
}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index bb3f0c9..0436754 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -66,7 +66,7 @@ extern int sysfs_add_one(struct sysfs_addrm_cxt *acxt,
struct sysfs_dirent *sd);
extern void sysfs_remove_one(struct ...The following patchset applies on top of the last round of sysfs cleanups that Tejun sent out on the 2nd. My target with this patchset is to support sysfs directories with a tag on struct sysfs_dirent making them visible only on selected mounts of sysfs. After going around and around the different possibilities I believe I have finally found something that works and is reasonably maintainable. I believe I have achieved that with only introducing some extra complexity in a few very localized places. The worst part is the code to support multiple superblocks and thus multiple dentry tress for sysfs. I had allocate a linked list in sysfs_move_dir for all of the possible dentries I would need to call d_move on. Bleh. It works, it is correct and it is an atomic rename. Eric sysfs: Move all of inode initialization into sysfs_init_inode sysfs: Remove sysfs_instantiate sysfs: Use kill_anon_super sysfs: Make sysfs_mount static sysfs: In sysfs_lookup don't open code sysfs_find_dirent sysfs: Simplify readdir. sysfs: Rewrite sysfs_drop_dentry. sysfs: Implement __sysfs_get_dentry sysfs: Move sysfs_get_dentry below __sysfs_get_dentry sysfs: Rewrite sysfs_get_dentry in terms of __sysfs_get_dentry sysfs: Remove s_dentry sysfs: Introduce sysfs_rename_mutex sysfs: Simply sysfs_get_dentry sysfs: Don't use lookup_one_len_kern vfs: Remove lookup_one_len_kern sysfs: Support for preventing unmounts. sysfs: Rewrite rename in terms of sysfs dirents sysfs: Rewrite sysfs_move_dir in terms of sysfs dirents sysfs: sysfs_get_dentry add a sb parameter sysfs: Rename Support multiple superblocks sysfs: sysfs_chmod_file handle multiple superblocks sysfs: sysfs_uptdate_file handle multiple superblocks sysfs: Implement sysfs tagged directory support. sysfs: Implement sysfs_delete_link and sysfs_rename_link driver core: Implement tagged directory support for device classes. -
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 37 -------------------------------------
fs/sysfs/inode.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
fs/sysfs/mount.c | 5 -----
3 files changed, 45 insertions(+), 45 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3e5acc4..ae06f4a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -746,24 +746,12 @@ int sysfs_create_dir(struct kobject * kobj)
return error;
}
-static int sysfs_count_nlink(struct sysfs_dirent *sd)
-{
- struct sysfs_dirent *child;
- int nr = 0;
-
- for (child = sd->s_children; child; child = child->s_sibling)
- if (sysfs_type(child) == SYSFS_DIR)
- nr++;
- return nr + 2;
-}
-
static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct dentry *ret = NULL;
struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
struct sysfs_dirent *sd;
- struct bin_attribute *bin_attr;
struct inode *inode;
mutex_lock(&sysfs_mutex);
@@ -783,31 +771,6 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
- if (inode->i_state & I_NEW) {
- /* initialize inode according to type */
- switch (sysfs_type(sd)) {
- case SYSFS_DIR:
- inode->i_op = &sysfs_dir_inode_operations;
- inode->i_fop = &sysfs_dir_operations;
- inode->i_nlink = sysfs_count_nlink(sd);
- break;
- case SYSFS_KOBJ_ATTR:
- inode->i_size = PAGE_SIZE;
- inode->i_fop = &sysfs_file_operations;
- break;
- case SYSFS_KOBJ_BIN_ATTR:
- bin_attr = sd->s_elem.bin_attr.bin_attr;
- inode->i_size = bin_attr->size;
- inode->i_fop = &bin_fops;
- break;
- case SYSFS_KOBJ_LINK:
- inode->i_op = &sysfs_symlink_inode_operations;
- break;
- default:
- BUG();
- }
- }
-
sysfs_instantiate(dentry, inode);
sysfs_attach_dentry(sd, dentry);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e74224e..aefc346 100644
--- ...Now that sysfs_get_inode is dropping the inode lock
we no longer have a need from sysfs_instantiate.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 2 +-
fs/sysfs/inode.c | 17 -----------------
fs/sysfs/sysfs.h | 1 -
3 files changed, 1 insertions(+), 19 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index ae06f4a..55d8299 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -771,7 +771,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
- sysfs_instantiate(dentry, inode);
+ d_instantiate(dentry, inode);
sysfs_attach_dentry(sd, dentry);
out_unlock:
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index aefc346..70a2420 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -212,23 +212,6 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
return inode;
}
-/**
- * sysfs_instantiate - instantiate dentry
- * @dentry: dentry to be instantiated
- * @inode: inode associated with @sd
- *
- * Unlock @inode if locked and instantiate @dentry with @inode.
- *
- * LOCKING:
- * None.
- */
-void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
-{
- BUG_ON(!dentry || dentry->d_inode);
-
- d_instantiate(dentry, inode);
-}
-
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
{
struct sysfs_addrm_cxt acxt;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0436754..8a0aea1 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -69,7 +69,6 @@ extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
extern void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
-extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
extern void release_sysfs_dirent(struct sysfs_dirent * sd);
extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
--
1.5.1.1.181.g2de0
-
Since sysfs no longer stores fs directory information in the dcache
on a permanent basis kill_litter_super it is inappropriate and actively
wrong. It will decrement the count on all dentries left in the
dcache before trying to free them.
At the moment this is not biting us only because we never unmount sysfs.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/mount.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 919eaaf..d1094c6 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -72,7 +72,7 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
static struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
};
int __init sysfs_init(void)
--
1.5.1.1.181.g2de0
-
This patch modifies the users of sysfs_mount to use sysfs_root
instead (which is what they are looking for). It then
makes sysfs_mount static to keep people from using it
by accident.
The net result is slightly faster and cleaner code.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 4 +---
fs/sysfs/mount.c | 2 +-
fs/sysfs/symlink.c | 7 +++----
fs/sysfs/sysfs.h | 1 -
4 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 55d8299..39df3ce 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -735,10 +735,8 @@ int sysfs_create_dir(struct kobject * kobj)
if (kobj->parent)
parent_sd = kobj->parent->sd;
- else if (sysfs_mount && sysfs_mount->mnt_sb)
- parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
else
- return -EFAULT;
+ parent_sd = &sysfs_root;
error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
if (!error)
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index d1094c6..9fae7d5 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -15,7 +15,7 @@
/* Random magic number */
#define SYSFS_MAGIC 0x62656572
-struct vfsmount *sysfs_mount;
+static struct vfsmount *sysfs_mount;
struct super_block * sysfs_sb = NULL;
struct kmem_cache *sysfs_dir_cachep;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index f77ad61..46f8fd4 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -60,10 +60,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
BUG_ON(!name);
- if (!kobj) {
- if (sysfs_mount && sysfs_mount->mnt_sb)
- parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
- } else
+ if (!kobj)
+ parent_sd = &sysfs_root;
+ else
parent_sd = kobj->sd;
error = -EFAULT;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 8a0aea1..77253aa 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -51,7 +51,6 @@ struct sysfs_addrm_cxt {
int cnt;
};
-extern struct ...This is a small cleanup patch that makes the code just a little bit cleaner. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> --- fs/sysfs/dir.c | 4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 39df3ce..2721e36 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -754,9 +754,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, mutex_lock(&sysfs_mutex); - for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) - if (sysfs_type(sd) && !strcmp(sd->s_name, dentry->d_name.name)) - break; + sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); /* no such entry */ if (!sd) -- 1.5.1.1.181.g2de0 -
At some point someone wrote sysfs_readdir to insert a cursor
into the list of sysfs_dirents to ensure that sysfs_readdir would
restart properly. That works but it is complex code and tends
to be expensive.
The same effect can be achieved by keeping the sysfs_dirents in
inode order and using the inode number as the f_pos. Then
when we restart we just have to find the first dirent whose inode
number is equal or greater then the last sysfs_dirent we attempted
to return.
Removing the sysfs directory cursor also allows the remove of
all of the mysterious checks for sysfs_type(sd) != 0. Which
were nonbovious checks to see if a cursor was in a directory list.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 175 ++++++++++++++------------------------------------------
1 files changed, 44 insertions(+), 131 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2721e36..ef99883 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -33,10 +33,20 @@ static DEFINE_IDA(sysfs_ino_ida);
static void sysfs_link_sibling(struct sysfs_dirent *sd)
{
struct sysfs_dirent *parent_sd = sd->s_parent;
+ struct sysfs_dirent **pos;
BUG_ON(sd->s_sibling);
- sd->s_sibling = parent_sd->s_children;
- parent_sd->s_children = sd;
+
+ /* Store directory entries in order by ino. This allows
+ * readdir to properly restart without having to add a
+ * cursor into the s_children list.
+ */
+ for (pos = &parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
+ if (sd->s_ino < (*pos)->s_ino)
+ break;
+ }
+ sd->s_sibling = *pos;
+ *pos = sd;
}
/**
@@ -657,7 +667,7 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
struct sysfs_dirent *sd;
for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
- if (sysfs_type(sd) && !strcmp(sd->s_name, name))
+ if (!strcmp(sd->s_name, name))
return sd;
return NULL;
}
@@ -809,7 +819,7 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
while ...Currently we find the dentry to drop by looking at sd->s_dentry.
We can just as easily accomplish the same task by looking up the
sysfs inode and finding all of the dentries from there, with the
added bonus that we don't need to play with the sysfs_assoc_lock.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 53 ++++++++++++++++++++++++++---------------------------
1 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index ef99883..6ca4382 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -563,50 +563,49 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
* Drop dentry for @sd. @sd must have been unlinked from its
* parent on entry to this function such that it can't be looked
* up anymore.
- *
- * @sd->s_dentry which is protected with sysfs_assoc_lock points
- * to the currently associated dentry but we're not holding a
- * reference to it and racing with dput(). Grab dcache_lock and
- * verify dentry before dropping it. If @sd->s_dentry is NULL or
- * dput() beats us, no need to bother.
*/
static void sysfs_drop_dentry(struct sysfs_dirent *sd)
{
- struct dentry *dentry = NULL;
struct inode *inode;
+ struct dentry *dentry;
+
+ inode = ilookup(sysfs_sb, sd->s_ino);
+ if (!inode)
+ return;
- /* We're not holding a reference to ->s_dentry dentry but the
- * field will stay valid as long as sysfs_assoc_lock is held.
+ /* Drop any existing dentries associated with sd.
+ *
+ * For the dentry to be properly freed we need to grab a
+ * reference to the dentry under the dcache lock, unhash it,
+ * and then put it. The playing with the dentry count allows
+ * dput to immediately free the dentry if it is not in use.
*/
- spin_lock(&sysfs_assoc_lock);
+repeat:
spin_lock(&dcache_lock);
-
- /* drop dentry if it's there and dput() didn't kill it yet */
- if (sd->s_dentry && sd->s_dentry->d_inode) {
- dentry = ...This function is similar but much simpler to sysfs_get_dentry
returns a sysfs dentry if one currently exists.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 38 ++++++++++++++++++++++++++++++++++++++
1 files changed, 38 insertions(+), 0 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 6ca4382..2caa5e0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -848,6 +848,44 @@ void sysfs_remove_dir(struct kobject * kobj)
__sysfs_remove_dir(sd);
}
+/**
+ * __sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
+ * @sd: sysfs_dirent of interest
+ *
+ * Get dentry for @sd. Only return a dentry if one currently
+ * exists.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * Pointer to found dentry on success, NULL on failure.
+ */
+static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd)
+{
+ struct inode *inode;
+ struct dentry *dentry = NULL;
+
+ inode = ilookup5_nowait(sysfs_sb, sd->s_ino, sysfs_ilookup_test, sd);
+ if (inode && !(inode->i_state & I_NEW)) {
+ struct dentry *alias;
+ spin_lock(&dcache_lock);
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ if (!IS_ROOT(alias) && d_unhashed(alias))
+ continue;
+ if (alias->d_sb != sb)
+ continue;
+ dentry = alias;
+ dget_locked(dentry);
+ break;
+ }
+ spin_unlock(&dcache_lock);
+ }
+ iput(inode);
+ return dentry;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd;
--
1.5.1.1.181.g2de0
-
sysfs_get_dentry is higher in fs/sysfs/dir.c then is needed and it the
dependencies get simpler if we move it down in the file to where I
have placed __sysfs_get_dentry. So this patch just moves
sysfs_get_dentry so code movement doesn't get confused with later code
changes.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 196 ++++++++++++++++++++++++++++----------------------------
1 files changed, 98 insertions(+), 98 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2caa5e0..59a9ce8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -73,104 +73,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
}
/**
- * sysfs_get_dentry - get dentry for the given sysfs_dirent
- * @sd: sysfs_dirent of interest
- *
- * Get dentry for @sd. Dentry is looked up if currently not
- * present. This function climbs sysfs_dirent tree till it
- * reaches a sysfs_dirent with valid dentry attached and descends
- * down from there looking up dentry for each step.
- *
- * LOCKING:
- * Kernel thread context (may sleep)
- *
- * RETURNS:
- * Pointer to found dentry on success, ERR_PTR() value on error.
- */
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
-{
- struct sysfs_dirent *cur;
- struct dentry *parent_dentry, *dentry;
- int i, depth;
-
- /* Find the first parent which has valid s_dentry and get the
- * dentry.
- */
- mutex_lock(&sysfs_mutex);
- restart0:
- spin_lock(&sysfs_assoc_lock);
- restart1:
- spin_lock(&dcache_lock);
-
- dentry = NULL;
- depth = 0;
- cur = sd;
- while (!cur->s_dentry || !cur->s_dentry->d_inode) {
- if (cur->s_flags & SYSFS_FLAG_REMOVED) {
- dentry = ERR_PTR(-ENOENT);
- depth = 0;
- break;
- }
- cur = cur->s_parent;
- depth++;
- }
- if (!IS_ERR(dentry))
- dentry = dget_locked(cur->s_dentry);
-
- spin_unlock(&dcache_lock);
- spin_unlock(&sysfs_assoc_lock);
-
- /* from the found dentry, look up depth times */
- while (depth--) {
- /* find and get ...This removes the last major user of s_dentry and makes
the locking in sysfs_get_dentry much simpler. Hopefully
leading to more readable and maintainable code.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 23 ++++++-----------------
1 files changed, 6 insertions(+), 17 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 59a9ce8..adb1b01 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -809,19 +809,15 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
struct dentry *parent_dentry, *dentry;
int i, depth;
- /* Find the first parent which has valid s_dentry and get the
- * dentry.
+ /* Find the first parent which has valid dentry.
*/
mutex_lock(&sysfs_mutex);
- restart0:
- spin_lock(&sysfs_assoc_lock);
- restart1:
- spin_lock(&dcache_lock);
+ restart:
dentry = NULL;
depth = 0;
cur = sd;
- while (!cur->s_dentry || !cur->s_dentry->d_inode) {
+ while (!(dentry = __sysfs_get_dentry(sysfs_sb, cur))) {
if (cur->s_flags & SYSFS_FLAG_REMOVED) {
dentry = ERR_PTR(-ENOENT);
depth = 0;
@@ -830,11 +826,6 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
cur = cur->s_parent;
depth++;
}
- if (!IS_ERR(dentry))
- dentry = dget_locked(cur->s_dentry);
-
- spin_unlock(&dcache_lock);
- spin_unlock(&sysfs_assoc_lock);
/* from the found dentry, look up depth times */
while (depth--) {
@@ -847,7 +838,7 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
*/
if (i != depth) {
dput(dentry);
- goto restart0;
+ goto restart;
}
sysfs_get(cur);
@@ -866,18 +857,16 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
}
mutex_lock(&sysfs_mutex);
- spin_lock(&sysfs_assoc_lock);
/* This, again, can happen if tree structure has
* changed and we looked up the wrong thing. Restart.
*/
- if (cur->s_dentry != dentry) {
+ if (dentry->d_fsdata != cur) {
dput(dentry);
sysfs_put(cur);
- goto ...The only uses of s_dentry left are the code that maintains
s_dentry and trivial users that don't actually need it.
So this patch removes the s_dentry maintenance code and
restructures the trivial uses to use something else.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 32 ++++----------------------------
fs/sysfs/mount.c | 1 -
fs/sysfs/sysfs.h | 1 -
3 files changed, 4 insertions(+), 30 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index adb1b01..1078e60 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -247,22 +247,7 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
{
struct sysfs_dirent * sd = dentry->d_fsdata;
- if (sd) {
- /* sd->s_dentry is protected with sysfs_assoc_lock.
- * This allows sysfs_drop_dentry() to dereference it.
- */
- spin_lock(&sysfs_assoc_lock);
-
- /* The dentry might have been deleted or another
- * lookup could have happened updating sd->s_dentry to
- * point the new dentry. Ignore if it isn't pointing
- * to this dentry.
- */
- if (sd->s_dentry == dentry)
- sd->s_dentry = NULL;
- spin_unlock(&sysfs_assoc_lock);
- sysfs_put(sd);
- }
+ sysfs_put(sd);
iput(inode);
}
@@ -310,9 +295,6 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
* @sd: target sysfs_dirent
* @dentry: dentry to associate
*
- * Associate @sd with @dentry. This is protected by
- * sysfs_assoc_lock to avoid race with sysfs_d_iput().
- *
* LOCKING:
* mutex_lock(sysfs_mutex)
*/
@@ -320,12 +302,6 @@ static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
{
dentry->d_op = &sysfs_dentry_ops;
dentry->d_fsdata = sysfs_get(sd);
-
- /* protect sd->s_dentry against sysfs_d_iput */
- spin_lock(&sysfs_assoc_lock);
- sd->s_dentry = dentry;
- spin_unlock(&sysfs_assoc_lock);
-
d_rehash(dentry);
}
@@ -927,7 +903,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char ...Looking carefully at the rename code we have a subtle dependency that the structure of sysfs not change while we are performing a rename. If the parent directory of the object we are renaming changes while the rename is being performed nasty things could happen when we go to release our locks. So introduce a sysfs_rename_mutex to prevent this highly unlikely theoretical issue. In addition hold sysfs_rename_mutex over all calls to sysfs_get_dentry. Allowing sysfs_get_dentry to be simplified in the future. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> --- fs/sysfs/dir.c | 8 +++++++- fs/sysfs/file.c | 4 ++++ fs/sysfs/sysfs.h | 1 + 3 files changed, 12 insertions(+), 1 deletions(-) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 1078e60..e0f49b7 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -15,6 +15,7 @@ #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); +DEFINE_MUTEX(sysfs_rename_mutex); spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; @@ -774,7 +775,7 @@ static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct sysfs_di * down from there looking up dentry for each step. * * LOCKING: - * Kernel thread context (may sleep) + * mutex_lock(sysfs_rename_mutex) * * RETURNS: * Pointer to found dentry on success, ERR_PTR() value on error. @@ -859,6 +860,8 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) const char *dup_name = NULL; int error; + mutex_lock(&sysfs_rename_mutex); + /* get the original dentry */ sd = kobj->sd; old_dentry = sysfs_get_dentry(sd); @@ -916,6 +919,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) kfree(dup_name); dput(old_dentry); dput(new_dentry); + mutex_unlock(&sysfs_rename_mutex); return error; } @@ -927,6 +931,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) struct dentry *old_dentry = NULL, *new_dentry = NULL; ...
Now that we know the sysfs tree structure cannot change under us
simplfy sysfs_get_dentry.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 48 ++++++------------------------------------------
1 files changed, 6 insertions(+), 42 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e0f49b7..a9bdb12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -784,71 +784,35 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
{
struct sysfs_dirent *cur;
struct dentry *parent_dentry, *dentry;
- int i, depth;
/* Find the first parent which has valid dentry.
*/
- mutex_lock(&sysfs_mutex);
- restart:
-
dentry = NULL;
- depth = 0;
cur = sd;
while (!(dentry = __sysfs_get_dentry(sysfs_sb, cur))) {
if (cur->s_flags & SYSFS_FLAG_REMOVED) {
dentry = ERR_PTR(-ENOENT);
- depth = 0;
break;
}
cur = cur->s_parent;
- depth++;
}
/* from the found dentry, look up depth times */
- while (depth--) {
- /* find and get depth'th ancestor */
- for (cur = sd, i = 0; cur && i < depth; i++)
+ while (dentry->d_fsdata != sd) {
+ /* Find the first ancestor I have not looked up */
+ cur = sd;
+ while (cur->s_parent != dentry->d_fsdata)
cur = cur->s_parent;
- /* This can happen if tree structure was modified due
- * to move/rename. Restart.
- */
- if (i != depth) {
- dput(dentry);
- goto restart;
- }
-
- sysfs_get(cur);
-
- mutex_unlock(&sysfs_mutex);
-
/* look it up */
parent_dentry = dentry;
dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
strlen(cur->s_name));
dput(parent_dentry);
- if (IS_ERR(dentry)) {
- sysfs_put(cur);
- return dentry;
- }
-
- mutex_lock(&sysfs_mutex);
-
- /* This, again, can happen if tree structure has
- * changed and we looked up the wrong thing. Restart.
- */
- if (dentry->d_fsdata != cur) {
- dput(dentry);
- sysfs_put(cur);
- goto ...Upon inspection it appears that there is no looking of the
inode mutex in lookup_one_len_kern and we aren't calling
it with the inode mutex and that is wrong.
So this patch rolls our own dcache insertion function that
does exactly what we need it to do. As it turns out this
is pretty trivial to do and it makes the code easier to
audit.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 41 +++++++++++++++++++++++++++++++++++++++--
1 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index a9bdb12..1d53c2a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -765,6 +765,44 @@ static struct dentry *__sysfs_get_dentry(struct super_block *sb, struct sysfs_di
return dentry;
}
+static struct dentry *sysfs_add_dentry(struct dentry *parent, struct sysfs_dirent *sd)
+{
+ struct qstr name;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&sysfs_mutex);
+ dentry = ERR_PTR(-EINVAL);
+ if (parent->d_fsdata != sd->s_parent)
+ goto out;
+
+ name.name = sd->s_name;
+ name.len = strlen(sd->s_name);
+ dentry = d_hash_and_lookup(parent, &name);
+ if (dentry)
+ goto out;
+
+ dentry = d_alloc(parent, &name);
+ if (!dentry) {
+ dentry = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ inode = sysfs_get_inode(sd);
+ if (!inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ d_instantiate(dentry, inode);
+ sysfs_attach_dentry(sd, dentry);
+out:
+ mutex_unlock(&sysfs_mutex);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ return dentry;
+}
+
/**
* sysfs_get_dentry - get dentry for the given sysfs_dirent
* @sd: sysfs_dirent of interest
@@ -806,8 +844,7 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
/* look it up */
parent_dentry = dentry;
- dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
- strlen(cur->s_name));
+ dentry = sysfs_add_dentry(parent_dentry, cur);
...Now that sysfs no longer uses lookup_one_len_kern the function has
no users so remove it from the kernel. Making namei.c just a little
easier to read.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/namei.c | 46 +++++++++++-----------------------------------
include/linux/namei.h | 1 -
2 files changed, 11 insertions(+), 36 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index a83160a..69d3304 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1273,7 +1273,12 @@ int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
return err;
}
-static inline struct dentry *__lookup_hash_kern(struct qstr *name, struct dentry *base, struct nameidata *nd)
+/*
+ * Restricted form of lookup. Doesn't follow links, single-component only,
+ * needs parent already locked. Doesn't follow mounts.
+ * SMP-safe.
+ */
+static inline struct dentry * __lookup_hash(struct qstr *name, struct dentry *base, struct nameidata *nd)
{
struct dentry *dentry;
struct inode *inode;
@@ -1281,6 +1286,11 @@ static inline struct dentry *__lookup_hash_kern(struct qstr *name, struct dentry
inode = base->d_inode;
+ err = permission(inode, MAY_EXEC, nd);
+ dentry = ERR_PTR(err);
+ if (err)
+ goto out;
+
/*
* See if the low-level filesystem might want
* to use its own hash..
@@ -1308,29 +1318,6 @@ out:
return dentry;
}
-/*
- * Restricted form of lookup. Doesn't follow links, single-component only,
- * needs parent already locked. Doesn't follow mounts.
- * SMP-safe.
- */
-static inline struct dentry * __lookup_hash(struct qstr *name, struct dentry *base, struct nameidata *nd)
-{
- struct dentry *dentry;
- struct inode *inode;
- int err;
-
- inode = base->d_inode;
-
- err = permission(inode, MAY_EXEC, nd);
- dentry = ERR_PTR(err);
- if (err)
- goto out;
-
- dentry = __lookup_hash_kern(name, base, nd);
-out:
- return dentry;
-}
-
static struct dentry *lookup_hash(struct nameidata *nd)
{
return ...To support mounting multiple instances of sysfs occassionally I
need to walk through all of the currently present sysfs super blocks.
To allow this iteration this patch adds sysfs_grab_supers
and sysfs_release_supers. While a piece of code is in
a section surrounded by these no more sysfs super blocks
will be either created or destroyed.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/mount.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++-----
fs/sysfs/sysfs.h | 10 +++++++
2 files changed, 81 insertions(+), 8 deletions(-)
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 4968d31..b2bfa45 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -33,47 +33,110 @@ struct sysfs_dirent sysfs_root = {
static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
{
- struct inode *inode;
- struct dentry *root;
+ struct sysfs_super_info *info = NULL;
+ struct inode *inode = NULL;
+ struct dentry *root = NULL;
+ int error;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = SYSFS_MAGIC;
sb->s_op = &sysfs_ops;
sb->s_time_gran = 1;
- sysfs_sb = sb;
+ if (!sysfs_sb)
+ sysfs_sb = sb;
+
+ error = -ENOMEM;
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto out_err;
/* get root inode, initialize and unlock it */
+ error = -ENOMEM;
inode = sysfs_get_inode(&sysfs_root);
if (!inode) {
pr_debug("sysfs: could not get root inode\n");
- return -ENOMEM;
+ goto out_err;
}
/* instantiate and link root dentry */
+ error = -ENOMEM;
root = d_alloc_root(inode);
if (!root) {
pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
- iput(inode);
- return -ENOMEM;
+ goto out_err;
}
root->d_fsdata = &sysfs_root;
sb->s_root = root;
+ sb->s_fs_info = info;
return 0;
+
+out_err:
+ dput(root);
+ iput(inode);
+ kfree(info);
+ if (sysfs_sb == sb)
+ sysfs_sb = NULL;
+ return error;
}
static int ...This patch rewrites sysfs_rename_dir to perform it's checks
as much as possible on the underlying sysfs_dirents instead
of the contents of the dcache. It turns out that this version
is a little simpler, and a little more like the rest of
the sysfs directory modification code.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 36 ++++++++++++++++--------------------
1 files changed, 16 insertions(+), 20 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 1d53c2a..3228f5a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -855,7 +855,7 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
- struct sysfs_dirent *sd;
+ struct sysfs_dirent *sd = kobj->sd;
struct dentry *parent = NULL;
struct dentry *old_dentry = NULL, *new_dentry = NULL;
const char *dup_name = NULL;
@@ -863,42 +863,41 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
mutex_lock(&sysfs_rename_mutex);
+ error = 0;
+ if (strcmp(sd->s_name, new_name) == 0)
+ goto out; /* nothing to rename */
+
/* get the original dentry */
- sd = kobj->sd;
old_dentry = sysfs_get_dentry(sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
- goto out_dput;
+ goto out;
}
parent = old_dentry->d_parent;
/* lock parent and get dentry for new name */
mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&sysfs_mutex);
- new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
- if (IS_ERR(new_dentry)) {
- error = PTR_ERR(new_dentry);
- goto out_unlock;
- }
-
- error = -EINVAL;
- if (old_dentry == new_dentry)
+ error = -EEXIST;
+ if (sysfs_find_dirent(sd->s_parent, new_name))
goto out_unlock;
- error = -EEXIST;
- if (new_dentry->d_inode)
+ error = -ENOMEM;
+ new_dentry = d_alloc_name(parent, new_name);
+ if (!new_dentry)
goto out_unlock;
/* rename kobject and sysfs_dirent */
error = -ENOMEM;
...This patch rewrites sysfs_move_dir to perform it's checks
as much as possible on the underlying sysfs_dirents instead
of the contents of the dcache, making sysfs_move_dir
more like the rest of the sysfs directory modification
code.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 34 ++++++++++++++++++----------------
1 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3228f5a..94d705a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -932,56 +932,58 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+ error = 0;
+ if (sd->s_parent == new_parent_sd)
+ goto out; /* nothing to move */
+
/* get dentries */
old_dentry = sysfs_get_dentry(sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
- goto out_dput;
+ goto out;
}
old_parent = old_dentry->d_parent;
new_parent = sysfs_get_dentry(new_parent_sd);
if (IS_ERR(new_parent)) {
error = PTR_ERR(new_parent);
- goto out_dput;
+ goto out;
}
- if (old_parent->d_inode == new_parent->d_inode) {
- error = 0;
- goto out_dput; /* nothing to move */
- }
again:
mutex_lock(&old_parent->d_inode->i_mutex);
if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
mutex_unlock(&old_parent->d_inode->i_mutex);
goto again;
}
+ mutex_lock(&sysfs_mutex);
- new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
- if (IS_ERR(new_dentry)) {
- error = PTR_ERR(new_dentry);
+ error = -EEXIST;
+ if (sysfs_find_dirent(new_parent_sd, sd->s_name))
goto out_unlock;
- } else
- error = 0;
+
+ error = -ENOMEM;
+ new_dentry = d_alloc_name(new_parent, sd->s_name);
+ if (!new_dentry)
+ goto out_unlock;
+
+ error = 0;
d_add(new_dentry, NULL);
d_move(old_dentry, new_dentry);
dput(new_dentry);
/* Remove from old parent's list and insert into ...In preparation for multiple mounts of sysfs add a superblock parameter to
sysfs_get_dentry.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 12 +++++++-----
fs/sysfs/file.c | 4 ++--
fs/sysfs/sysfs.h | 2 +-
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 94d705a..ac45523 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -805,6 +805,7 @@ out:
/**
* sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
* @sd: sysfs_dirent of interest
*
* Get dentry for @sd. Dentry is looked up if currently not
@@ -817,8 +818,9 @@ out:
*
* RETURNS:
* Pointer to found dentry on success, ERR_PTR() value on error.
+ * NULL if the sysfs dentry does not appear in the specified superblock
*/
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd)
{
struct sysfs_dirent *cur;
struct dentry *parent_dentry, *dentry;
@@ -827,7 +829,7 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
*/
dentry = NULL;
cur = sd;
- while (!(dentry = __sysfs_get_dentry(sysfs_sb, cur))) {
+ while (!(dentry = __sysfs_get_dentry(sb, cur))) {
if (cur->s_flags & SYSFS_FLAG_REMOVED) {
dentry = ERR_PTR(-ENOENT);
break;
@@ -868,7 +870,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
goto out; /* nothing to rename */
/* get the original dentry */
- old_dentry = sysfs_get_dentry(sd);
+ old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
goto out;
@@ -937,14 +939,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
goto out; /* nothing to move */
/* get dentries */
- old_dentry = sysfs_get_dentry(sd);
+ old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
goto ...This patch modifies the sysfs_rename_dir and sysfs_move_dir
to support multiple sysfs dentry trees rooted in different
sysfs superblocks.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/dir.c | 193 +++++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 136 insertions(+), 57 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index ac45523..34eabf4 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -855,42 +855,113 @@ struct dentry *sysfs_get_dentry(struct super_block *sb, struct sysfs_dirent *sd)
return dentry;
}
+struct sysfs_rename_struct {
+ struct list_head list;
+ struct dentry *old_dentry;
+ struct dentry *new_dentry;
+ struct dentry *old_parent;
+ struct dentry *new_parent;
+};
+
+static void post_rename(struct list_head *head)
+{
+ struct sysfs_rename_struct *srs;
+ while (!list_empty(head)) {
+ srs = list_entry(head->next, struct sysfs_rename_struct, list);
+ dput(srs->old_dentry);
+ dput(srs->new_dentry);
+ dput(srs->old_parent);
+ dput(srs->new_parent);
+ list_del(&srs->list);
+ kfree(srs);
+ }
+}
+
+static int prep_rename(struct list_head *head,
+ struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
+ const char *name)
+{
+ struct sysfs_rename_struct *srs;
+ struct super_block *sb;
+ struct dentry *dentry;
+ int error;
+
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = sysfs_get_dentry(sb, sd);
+ if (!dentry)
+ continue;
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto err_out;
+ }
+
+ srs = kzalloc(sizeof(*srs), GFP_KERNEL);
+ if (!srs) {
+ dput(dentry);
+ goto err_out;
+ }
+
+ INIT_LIST_HEAD(&srs->list);
+ list_add(head, &srs->list);
+ srs->old_dentry = dentry;
+ srs->old_parent = dget(dentry->d_parent);
+
+ dentry = sysfs_get_dentry(sb, new_parent_sd);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto err_out;
+ }
+ srs->new_parent = dentry;
+
+ error = -ENOMEM;
+ dentry ...Teach sysfs_chmod_file how to handle multiple sysfs
superblocks. We need to iterate over each superblock
so that we give all of the appropriate filesystem modification
notifications.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/file.c | 41 +++++++++++++++++++++++++----------------
1 files changed, 25 insertions(+), 16 deletions(-)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f954b9f..cff054f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -501,7 +501,8 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
{
struct sysfs_dirent *victim_sd = NULL;
- struct dentry *victim = NULL;
+ struct super_block *sb;
+ struct dentry *victim;
struct inode * inode;
struct iattr newattrs;
int rc;
@@ -512,22 +513,30 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
goto out;
mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(sysfs_sb, victim_sd);
- mutex_unlock(&sysfs_rename_mutex);
- if (IS_ERR(victim)) {
- rc = PTR_ERR(victim);
- victim = NULL;
- goto out;
+ sysfs_grab_supers();
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ victim = sysfs_get_dentry(sb, victim_sd);
+ if (!victim)
+ continue;
+ if (IS_ERR(victim)) {
+ rc = PTR_ERR(victim);
+ victim = NULL;
+ goto out_unlock;
+ }
+
+ inode = victim->d_inode;
+ mutex_lock(&inode->i_mutex);
+ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ rc = notify_change(victim, &newattrs);
+ mutex_unlock(&inode->i_mutex);
+
+ dput(victim);
}
-
- inode = victim->d_inode;
- mutex_lock(&inode->i_mutex);
- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- rc = notify_change(victim, &newattrs);
- mutex_unlock(&inode->i_mutex);
- ...Teach sysfs_update_file how to handle multiple sysfs
superblocks. Again we are just iterating over the superblocks
to so all of the filesystem modification notifications work
as expected.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/file.c | 35 +++++++++++++++++++++--------------
1 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index cff054f..1e6f9df 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -462,7 +462,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
{
struct sysfs_dirent *victim_sd = NULL;
- struct dentry *victim = NULL;
+ struct super_block *sb;
+ struct dentry *victim;
int rc;
rc = -ENOENT;
@@ -471,21 +472,27 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
goto out;
mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(sysfs_sb, victim_sd);
- mutex_unlock(&sysfs_rename_mutex);
- if (IS_ERR(victim)) {
- rc = PTR_ERR(victim);
- victim = NULL;
- goto out;
+ sysfs_grab_supers();
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ victim = sysfs_get_dentry(sb, victim_sd);
+ if (!victim)
+ continue;
+ if (IS_ERR(victim)) {
+ rc = PTR_ERR(victim);
+ victim = NULL;
+ goto out_unlock;
+ }
+ mutex_lock(&victim->d_inode->i_mutex);
+ victim->d_inode->i_mtime = CURRENT_TIME;
+ fsnotify_modify(victim);
+ mutex_unlock(&victim->d_inode->i_mutex);
+ rc = 0;
+ dput(victim);
}
-
- mutex_lock(&victim->d_inode->i_mutex);
- victim->d_inode->i_mtime = CURRENT_TIME;
- fsnotify_modify(victim);
- mutex_unlock(&victim->d_inode->i_mutex);
- rc = 0;
+ out_unlock:
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
out:
- dput(victim);
sysfs_put(victim_sd);
return rc;
}
--
1.5.1.1.181.g2de0
-
The problem. When implementing a network namespace I need to be able to have multiple network devices with the same name. Currently this is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and potentially a few other directories of the form /sys/ ... /net/*. What this patch does is to add an additional tag field to the sysfs dirent structure. For directories that should show different contents depending on the context such as /sys/class/net/, and /sys/devices/virtual/net/ this tag field is used to specify the context in which those directories should be visible. Effectively this is the same as creating multiple distinct directories with the same name the internally to sysfs the result is nicer. I am calling the concept of a single directory that looks like multiple directories all at the same path in the filesystem tagged directories. For the networking namespace the set of directories whose contents I need to filter with tags can depend on the presence or absence of hutplug hardware or which modules are currently loaded. Which means I need a simple race free way to setup those directories as taged. To achieve a race free design all tagged directories are created and managed by sysfs itself. The upper level code that knows what tagged directories we need provides just two methods that enable this: sb_tag() - that returns a "void *" tag that identifies the context of the process that mounted sysfs. kobject_tag(kobj) - that returns a "void *" tag that identifies the context a kobject should be in. Everything else is left up to sysfs. For the network namespace sb_tag and kobject_tag are essentially one line functions, and look to remain that. The work needed in sysfs is more extensive. At each directory or symlink creating I need to check if the directory it is being created in is a tagged directory and if so generate the appropriate tag to place on the sysfs_dirent. Likewise at each symlink or directory removal I need to check if the sysfs ...
When removing a symlink sysfs_remove_link does not provide
enough information to figure out which tagged directory the symlink
falls in. So I need sysfs_delete_link which is passed the target
of the symlink to delete.
Further half the time when we are removing a symlink the code is
actually renaming the symlink but not doing so explicitly because
we don't have a symlink rename method. So I have added sysfs_rename_link
as well.
Both of these functions now have enough information to find a symlink
in a tagged directory. The only restriction is that they must be called
before the target kobject is renamed or deleted. If they are called
later I loose track of which tag the target kobject was marked with
and can no longer find the old symlink to remove it.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
fs/sysfs/symlink.c | 31 +++++++++++++++++++++++++++++++
include/linux/sysfs.h | 18 ++++++++++++++++++
2 files changed, 49 insertions(+), 0 deletions(-)
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 99aaf6f..6476b8f 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -106,6 +106,21 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
/**
+ * sysfs_delete_link - remove symlink in object's directory.
+ * @kobj: object we're acting for.
+ * @targ: object we're pointing to.
+ * @name: name of the symlink to remove.
+ *
+ * Unlike sysfs_remove_link sysfs_delete_link has enough information
+ * to successfully delete symlinks in tagged directories.
+ */
+void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
+ const char *name)
+{
+ sysfs_hash_and_remove(targ, kobj->sd, name);
+}
+
+/**
* sysfs_remove_link - remove symlink in object's directory.
* @kobj: object we're acting for.
* @name: name of the symlink to remove.
@@ -116,6 +131,22 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
sysfs_hash_and_remove(kobj, kobj->sd, name);
}
+/**
+ ...This patch enables tagging on every class directory if struct class
has tag_ops.
In addition device_del and device_rename were modified to use
sysfs_delete_link and sysfs_rename_link respectively to ensure
when these operations happen on devices whos classes have
tag_ops that they work properly.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
drivers/base/class.c | 30 ++++++++++++++++++++++++++----
drivers/base/core.c | 45 ++++++++++++++++++++++++---------------------
include/linux/device.h | 2 ++
3 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 4d22226..1485e2a 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -134,6 +134,17 @@ static void remove_class_attrs(struct class * cls)
}
}
+static int class_setup_tagging(struct class *cls)
+{
+ const struct sysfs_tagged_dir_operations *tag_ops;
+
+ tag_ops = cls->tag_ops;
+ if (!tag_ops)
+ return 0;
+
+ return sysfs_enable_tagging(&cls->subsys.kobj, tag_ops);
+}
+
int class_register(struct class * cls)
{
int error;
@@ -152,11 +163,22 @@ int class_register(struct class * cls)
subsys_set_kset(cls, class_subsys);
error = subsystem_register(&cls->subsys);
- if (!error) {
- error = add_class_attrs(class_get(cls));
- class_put(cls);
- }
+ if (error)
+ goto out;
+
+ error = class_setup_tagging(cls);
+ if (error)
+ goto out_unregister;
+
+ error = add_class_attrs(cls);
+ if (error)
+ goto out_unregister;
+
+out:
return error;
+out_unregister:
+ subsystem_unregister(&cls->subsys);
+ goto out;
}
void class_unregister(struct class * cls)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index e6738bc..8663d96 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -637,8 +637,14 @@ static struct kobject * get_device_parent(struct device *dev,
return kobj;
/* or create a new class-directory at the parent device */
- return ...Okay, here's a different implementation of tagged nodes. I just compile tested it so I can guarantee it's broken. I don't think there's anything fundamentally wrong but I'm wrong fairly often, so if you find something moronic, feel free to scream at me. 1. there's no enable_tagging() or fundamental restrictions on which types of nodes can be tagged. 2. no callback. tags are set by sysfs_rename_dir_tagged(). symlinks follow the tag of its target_sd. This limits taggable node types to dirs and symlinks. 3. in all paths between the root and leaf nodes, only zero or one tagged sd exists. all children of a tagged sd have NULL s_tag but are considered tagged the same as the tagged ancestor. 4. untagged entries are visible in all supers unless there's a matching tagged entry overshadowing it. tagged entry is only visible in the matching tagged super. 5. due to #3, children in a directory which belong to the same tag can be looked up using NULL tag. Most leaf node ops don't need to be changed. 6. symlink removal is different. we either need to modify the interface to take target_sd or implement new one. actually, I think linking symlinks to target_sd and renaming/removing them automagically would be pretty good. Thanks. What do you think? JUST FOR BRAIN STORMING. DO NOT APPLY. --- fs/sysfs/bin.c | 2 fs/sysfs/dir.c | 122 ++++++++++++++++++++++++++++++++++++++++---------- fs/sysfs/file.c | 4 - fs/sysfs/group.c | 2 fs/sysfs/inode.c | 5 +- fs/sysfs/mount.c | 44 ++++++++++++++++-- fs/sysfs/symlink.c | 8 ++- fs/sysfs/sysfs.h | 11 +++- include/linux/sysfs.h | 4 + 9 files changed, 167 insertions(+), 35 deletions(-) Index: work/fs/sysfs/dir.c =================================================================== --- work.orig/fs/sysfs/dir.c +++ work/fs/sysfs/dir.c @@ -387,7 +387,7 @@ void sysfs_addrm_start(struct sysfs_addr */ int ...
Hmmm... Please fix sysfs_get_dentry() and make it return either NULL or PTR_ERR() values. Returning both is pretty confusing. Also, it would be nice if we can use the rename_prep stuff for this too but it might just be a wishful thinking. Thanks. -- tejun -
Hello, Please rename to sysfs_rename_cxt to it consistent with sysfs_get_dentry() return ERR_PTR() value. Oops, sysfs_get_dentry() implementation is wrong too. Also, please move sysfs_grab/release_supers() near this patch and add (a lot of) comments there. Other than that, I think this is as clean as this can be. Great. -- tejun -
Welcome. I will see what I can do with respect to cleaning up the names. As for the return value of sysfs_get_dentry that is tricky. In particular I have three specific cases the code needs to deal with. - We got the dentry. - We did not get the dentry because for this super block there never ever will be a dentry. - Some kind of error occurred in attempting to get the dentry. Not getting a dentry because it is impossible I am currently handling with a NULL return. I can equally use a specific error code to mean that as well. It doesn't much matter. So I guess the hunk in As long as we handle that class of error differently I really don't care. Eric -
Yeah, I think using -ENOENT is better; otherwise, my little head feels like exploding. :-) More importantly, sysfs_get_dentry() seems like it would deference ERR_PTR() value. No? -- tejun -
I'm confused where you are referring to but I will try answer this. __sysfs_get_dentry always returns a dentry or NULL if it can't find the dentry. This is the only location where could return NULL the early check Here we depend on the fact that sysfs_root is pointed to Eric -
Hmmm... dentry could be ERR_PTR(-ENOENT) here if the REMOVED flag test And then dereferenced. The REMOVED test should never succeed there, so we're probably in the clear but still the code looks a bit scary. -- tejun -
Ugh right. Now that I don't have the locking it probably makes Agreed. That is a bug. Either the removed test needs to be removed because it can't happen or that case needs to be handled. Thanks for spotting this one. Mental blind spots are the worst. Eric -
It's probably better to add @sb to __sysfs_get_dentry() here too. That will make things look clearer. Thanks. -- tejun -
A little. I just wanted to be certain I was testing the final __sysfs_get_dentry logic as soon as I could. Eric -
Signed-off-by: Tejun Heo <htejun@gmail.com> -- tejun -
sysfs_mutex is being grabbed twice and unlocked twice later. -- tejun -
Ugh. That patch was supposed to remove the inner grab and release of sysfs_mutex. I guess I somehow failed to move that change all of the way down to this patch. #18 gets it right for sysfs_move_dir. And #20 fixes it but yes this patch is broken, and will mess up any git-bisect badly. So this patch needs to be respun. Eric -
Oh.. you're killing lookup_one_len_kern(). In that case, I think the previous one is okay too. Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
This is virtually identical to mutex_lock(&parent_dentry->d_inode->i_mutex); dentry = lookup_one_len_kern(cur->s_name, parent_dentry, strlen(cur->s_name)); mutex_unlock(&parent_dentry->d_inode->i_mutex); right? I don't think we need to duplicate the code here. Or is it needed for later multi-sb thing? -- tejun -
Right. We can do that as well. In practice in working code there is no real difference. There is a little extra uniformity in rolling it ourselves, but not enough to worry about either way. In the review/debug etc cycle it just wound up being a lot easier to roll the code myself. By the time we get to lookup_one_len_kern it is almost impenetrable code in namei.c where sysfs_add_dentry tends is easier to comprehend, and to modify for debugging. Eric -
Yeap, agreed. I agreed with this one too in the comment for the next patch. I guess I should have wrote here too. Sorry about the trouble. Thanks. -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> It might be better to have sysfs_get_dentry_locked() and sysfs_get_dentry() so that the latter can be used from sysfs_update/chmod_file(). -- tejun -
Hello, Eric. Yeah, it's a theoretical issue. Rename/move implementation has always depended on the parent structure not changing beneath it, but it's nice LOCKING describes what locks should be held when entering the function, so proper description would be something like... Kernel thread context, grabs sysfs_rename_mutex Thanks. -- tejun -
Oops, forget about the above. Thought the comment was added to sysfs_rename_dir(). -- tejun -
Maybe. I didn't feel any loss in when I was writing the code. Well this is weird in that it should be on sysfs_get_dentry For rename_dir and move_dir yes. I was updating the rules for sysfs_get_dentry. Which really wants it's parents to hold that lock. Eric -
Acked-by: Tejun Heo <htejun@gmail.com> Nice clean up. Thanks. -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Great, Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -- tejun -
Acked-by: Tejun Heo <htejun@gmail.com> -
Acked-by: Tejun Heo <htejun@gmail.com> -
On Tue, 07 Aug 2007 15:06:21 -0600, My udev failed to create /dev/dasd* so it cannot mount root :( I'm currently trying to find out what causes this, may take some time... -
Oh weird. No great surprise that something goofed up given how many patches were involved. Still there shouldn't have been any user visible differences in the patchset. If you can narrow down which patch caused the problem that would be great. Otherwise I guess I will have to start looking at the code and trying to guess what might have triggered this. Eric -
JFYI, your patchset works fine on my x86-64 test machine. Corenelia tests on a s390 system which does rename/move stuff with its devices and usually gets hit first if something is wrong. I'll scream if I find anything suspicious while reviewing. Thanks. -- tejun -
On Wed, 08 Aug 2007 01:47:51 -0600, Got it: It's patch 6, the readdir simplification. (The udev on that guest is ancient (063)...) -
Ok. That is weird. Does it depend on the order in which the dentries are returned from readdir? Unless I made a really stupid error otherwise the two versions of readdir should have the same semantics. Eric -
On Wed, 08 Aug 2007 01:57:07 -0600, More weirdness. If I activate another dasd from the repair file system, /dev/dasdb is created... Same if I set the card reader online: /dev/vmrdr-0.0.000c is created as Yes, your patch looks sane. I have no idea why it breaks stuff... -
Are you sure it's patch 6? Patch 17 adds a deadlock in rename path. -- tejun -
On Wed, 08 Aug 2007 17:54:44 +0900, Yes. If I apply the series up to patch 5, everything's fine. If I apply patch 6, /dev/dasda* is missing. -
On Wed, 8 Aug 2007 10:37:59 +0200,
OK, it seems that it is udevstart that has problems. (Normal udev
operation seems fine; when I trigger uevents manually, the device nodes
are created.)
I ran strace on udevstart and found that it skipped
/sys/block/dasd/dasda1 (interestingly, /dev/dasda was created after I
ran udevstart manually; no idea why this didn't happen on boot. Further
manual runs don't create /dev/dasda1, however.) ls doesn't produce
different output on the two kernels.
Here is some excerpt from the strace run with the broken kernel:
open("/sys/block", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_DIRECTORY) = 3
fstat64(3, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
fcntl64(3, F_SETFD, FD_CLOEXEC) = 0
getdents64(3, /* 28 entries */, 4096) = 800
_llseek(3, 3414, [3414], SEEK_SET) = 0
stat64("/sys/block/dasda/dev", {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0
open("/sys/block/dasda", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_DIRECTORY) = 4
fstat64(4, {st_mode=S_IFDIR|0755, st_size=0, ...}) = 0
fcntl64(4, F_SETFD, FD_CLOEXEC) = 0
getdents64(4, /* 15 entries */, 4096) = 440
_llseek(4, 3079, [3079], SEEK_SET) = 0
stat64("/sys/block/dasda/uevent/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/dev/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/range/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/removable/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/size/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/stat/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/capability/dev", 0x7fb8c3f8) = -1 ENOTDIR (Not a directory)
stat64("/sys/block/dasda/device/dev", 0x7fb8c3f8) = -1 ENOENT (No such file or d irectory)
stat64("/sys/block/dasda/subsystem/dev", 0x7fb8c3f8) = -1 ENOENT (No such file or directory)
stat64("/sys/block/dasda/holders/dev", 0x7fb8c3f8) = -1 ENOENT (No such file or ...Does the attached patch happen to fix the problem? -- tejun
On Wed, 08 Aug 2007 23:35:36 +0900, Indeed it does; thanks! -
Yeah, you seem to have 32bit off_t. UINT_MAX overflows, so... -- tejun -
Weird. And we have it opening the directory O_LARGEFILE. I have no problems with the fix though. Eric -
It's probably because of struct dirent definition used by readdir(). It may differ depending on archs and glibc version but IIRC the backend implementation of all directory listing functions in glibc were the same. It opens with O_LARGEFILE and use getdents64 to get full info then clip it to whatever limit the called API imposes. -- tejun -
More specifically, d_off field. It's a bit twisted. For the last entry, filp->f_pos gets written into the field and gets wrapped while being copied out to userland or in glibc. -- tejun -
That could do it, and glibc is crunching it. Oh well, it is easy enough to avoid as long as our inode numbers are small which the idr allocator seems to ensure. Eric -
Hello, Yeah, now I think about it. glibc throws out entries which don't fit in the data structure specified by the called API, so it probably threw out the last entry which has UINT_MAX in d_off which doesn't fit in the readdir() return structure. Using INT_MAX should be just fine as IDA always allocates the first empty slot. We can add paranoia check in ino allocation path. -- tejun -
Sounds reasonable. Annoying but reasonable. Eric -
On Wed, 08 Aug 2007 23:55:29 +0900, <checks> Yes, that was an 31 bit guest. (Good thing I tried it there, I usually use 64 bit guests...) -
On Wed, 8 Aug 2007 16:50:27 +0200, And the whole patchset seems to work as well. I did my usual attach/detach/move stuff, and didn't see any problems. -
Make sysfs_add_one() check for duplicate entry and return -EEXIST if
such entry exists. This simplifies node addition code a bit.
This patch doesn't introduce any noticeable behavior change.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
fs/sysfs/dir.c | 26 +++++++++++++++++---------
fs/sysfs/file.c | 12 +++++-------
fs/sysfs/symlink.c | 9 +++------
fs/sysfs/sysfs.h | 2 +-
4 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8ce3ffb..91934e1 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -491,9 +491,16 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
*
* LOCKING:
* Determined by sysfs_addrm_start().
+ *
+ * RETURNS:
+ * 0 on success, -EEXIST if entry with the given name already
+ * exists.
*/
-void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
+ if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
+ return -EEXIST;
+
sd->s_parent = sysfs_get(acxt->parent_sd);
if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
@@ -502,6 +509,8 @@ void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
acxt->cnt++;
sysfs_link_sibling(sd);
+
+ return 0;
}
/**
@@ -691,6 +700,7 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
+ int rc;
/* allocate */
sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
@@ -700,17 +710,15 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
/* link in */
sysfs_addrm_start(&acxt, parent_sd);
+ rc = sysfs_add_one(&acxt, sd);
+ sysfs_addrm_finish(&acxt);
- if (!sysfs_find_dirent(parent_sd, name))
- sysfs_add_one(&acxt, sd);
-
- if (!sysfs_addrm_finish(&acxt)) {
+ if (rc == ...When adding or removing a sysfs_dirent, the user used to be required
to call link/unlink separately. It was for two reasons - code looked
like that before sysfs_addrm_cxt conversion and to avoid looping
through parent_sd->children list twice during removal.
Performance optimization during removal just isn't worth it. Make
sysfs_add/remove_one() call sysfs_link/unlink_sibing() implicitly.
This makes code simpler albeit slightly less efficient. This change
doesn't introduce any noticeable behavior change.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
fs/sysfs/dir.c | 21 ++++++++++-----------
fs/sysfs/file.c | 4 +---
fs/sysfs/inode.c | 17 ++++-------------
fs/sysfs/symlink.c | 4 +---
fs/sysfs/sysfs.h | 2 --
5 files changed, 16 insertions(+), 32 deletions(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 0fe6aa3..8ce3ffb 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -30,7 +30,7 @@ static DEFINE_IDA(sysfs_ino_ida);
* Locking:
* mutex_lock(sysfs_mutex)
*/
-void sysfs_link_sibling(struct sysfs_dirent *sd)
+static void sysfs_link_sibling(struct sysfs_dirent *sd)
{
struct sysfs_dirent *parent_sd = sd->s_parent;
@@ -49,7 +49,7 @@ void sysfs_link_sibling(struct sysfs_dirent *sd)
* Locking:
* mutex_lock(sysfs_mutex)
*/
-void sysfs_unlink_sibling(struct sysfs_dirent *sd)
+static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
{
struct sysfs_dirent **pos;
@@ -500,6 +500,8 @@ void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
inc_nlink(acxt->parent_inode);
acxt->cnt++;
+
+ sysfs_link_sibling(sd);
}
/**
@@ -521,7 +523,9 @@ void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
*/
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
- BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
+ BUG_ON(sd->s_flags & ...