Re: [PATCH 3/5] Module: check to see if we have a built in module with the same name

Previous thread: cups slow on linux-2.6.24 by Jeff Chua on Sunday, January 27, 2008 - 7:18 pm. (1 message)

Next thread: 2.6.24 regression: Wake On Lan in sky2 broken on Mac mini by Tino Keitel on Sunday, January 27, 2008 - 8:29 pm. (10 messages)
To: Linus Torvalds <torvalds@...>, Andrew Morton <akpm@...>
Cc: <linux-kernel@...>, Kay Sievers <kay.sievers@...>, Jeremy Fitzhardinge <jeremy@...>, Alexander van Heukelum <heukelum@...>, Yinghai Lu <yhlu.kernel@...>, Ingo Molnar <mingo@...>, Paul Mackerras <paulus@...>
Date: Sunday, January 27, 2008 - 7:37 pm

Here's 5 patches against your current git tree that fix all of the
reported breakages due to the driver core patch merge.

They fix the following issues:
- build breakage with CONFIG_BLOCK=n
- strange traceback messages when loading a module that is already
built into the kernel.
- ppc vio code build warning and link error fix.
- oops fix for mce_amd_64 when running on some variants of multi-core
AMD boxes.

Please pull from:
master.kernel.org:/pub/scm/linux/kernel/git/gregkh/driver-2.6.git/

Patches will be sent as a follow-on to this message to lkml for people
to see.

thanks,

greg k-h

------------

arch/powerpc/kernel/vio.c | 13 +++-------
arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 3 +-
drivers/base/bus.c | 41 ++++++++++++++++++++++---------
drivers/base/class.c | 2 +-
drivers/base/core.c | 30 ++++++++++++++--------
include/linux/device.h | 3 ++
kernel/module.c | 10 +++++++
7 files changed, 68 insertions(+), 34 deletions(-)

---------------

Greg Kroah-Hartman (4):
Driver core: Fix up build when CONFIG_BLOCK=N
x86: fix runtime error in arch/x86/kernel/cpu/mcheck/mce_amd_64.c
Module: check to see if we have a built in module with the same name
Driver core: add bus_find_device_by_name function

Paul Mackerras (1):
PPC: Fix powerpc vio_find_name to not use devices_subsys

--

To: <linux-kernel@...>
Cc: Paul Mackerras <paulus@...>, Greg Kroah-Hartman <gregkh@...>
Date: Sunday, January 27, 2008 - 7:38 pm

From: Paul Mackerras <paulus@samba.org>

This fixes vio_find_name() in arch/powerpc/kernel/vio.c, which is
currently broken because it tries to use devices_subsys. That is bad
for two reasons: (1) it's doing (or trying to do) a scan of all
devices when it should only be scanning those on the vio bus, and
(2) devices_subsys was an internal symbol of the device system code
which was never meant for external use and has now gone away, and
thus the kernel fails to compile on pSeries.

The new version uses bus_find_device_by_name() on the vio bus
(vio_bus_type).

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/powerpc/kernel/vio.c | 13 ++++---------
1 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 19a5656..f0bad70 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -37,8 +37,6 @@
#include <asm/iseries/hv_call_xm.h>
#include <asm/iseries/iommu.h>

-extern struct kset devices_subsys; /* needed for vio_find_name() */
-
static struct bus_type vio_bus_type;

static struct vio_dev vio_bus_device = { /* fake "parent" device */
@@ -361,19 +359,16 @@ EXPORT_SYMBOL(vio_get_attribute);
#ifdef CONFIG_PPC_PSERIES
/* vio_find_name() - internal because only vio.c knows how we formatted the
* kobject name
- * XXX once vio_bus_type.devices is actually used as a kset in
- * drivers/base/bus.c, this function should be removed in favor of
- * "device_find(kobj_name, &vio_bus_type)"
*/
-static struct vio_dev *vio_find_name(const char *kobj_name)
+static struct vio_dev *vio_find_name(const char *name)
{
- struct kobject *found;
+ struct device *found;

- found = kset_find_obj(&devices_subsys, kobj_name);
+ found = bus_find_device_by_name(&vio_bus_type, NULL, name);
if (!found)
return NULL;

- return to_vio_dev(container_of(found, struct device, kobj));
+ return to_vio...

To: <linux-kernel@...>
Cc: Greg Kroah-Hartman <gregkh@...>, Paul Mackerras <paulus@...>
Date: Sunday, January 27, 2008 - 7:38 pm

The driver core, and some other parts of the kernel just want to find a
device based on a name for a specific bus. Give them a simple wrapper
to prevent them from having to always roll their own.

This will be used in the PPC patch later in this series.

Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/base/bus.c | 41 +++++++++++++++++++++++++++++------------
include/linux/device.h | 3 +++
2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index f484495..055989e 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -163,15 +163,6 @@ static struct kset *bus_kset;

#ifdef CONFIG_HOTPLUG
/* Manually detach a device from its associated driver. */
-static int driver_helper(struct device *dev, void *data)
-{
- const char *name = data;
-
- if (strcmp(name, dev->bus_id) == 0)
- return 1;
- return 0;
-}
-
static ssize_t driver_unbind(struct device_driver *drv,
const char *buf, size_t count)
{
@@ -179,7 +170,7 @@ static ssize_t driver_unbind(struct device_driver *drv,
struct device *dev;
int err = -ENODEV;

- dev = bus_find_device(bus, NULL, (void *)buf, driver_helper);
+ dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == drv) {
if (dev->parent) /* Needed for USB */
down(&dev->parent->sem);
@@ -206,7 +197,7 @@ static ssize_t driver_bind(struct device_driver *drv,
struct device *dev;
int err = -ENODEV;

- dev = bus_find_device(bus, NULL, (void *)buf, driver_helper);
+ dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == NULL) {
if (dev->parent) /* Needed for USB */
down(&dev->parent->sem);
@@ -250,7 +241,7 @@ static ssize_t store_drivers_probe(struct bus_type *bus,
{
struct device *dev;

- dev = bus_find_device(bus, NULL, (void *)buf, driver_helper);
+ dev = bus_find_device_by_name(bus, NULL, buf)...

To: <linux-kernel@...>
Cc: Greg Kroah-Hartman <gregkh@...>, Rusty Russell <rusty@...>, Linus Torvalds <torvalds@...>, Andrew Morton <akpm@...>
Date: Sunday, January 27, 2008 - 7:38 pm

When trying to load a module with the same name as a built-in one, a
scary kobject backtrace comes up. Prevent that from checking for this
condition and warning the user as to what exactly is going on.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
kernel/module.c | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 1bb4c5e..76ddc85 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1214,6 +1214,7 @@ void module_remove_modinfo_attrs(struct module *mod)
int mod_sysfs_init(struct module *mod)
{
int err;
+ struct kobject *kobj;

if (!module_sysfs_initialized) {
printk(KERN_ERR "%s: module sysfs not initialized\n",
@@ -1221,6 +1222,15 @@ int mod_sysfs_init(struct module *mod)
err = -EINVAL;
goto out;
}
+
+ kobj = kset_find_obj(module_kset, mod->name);
+ if (kobj) {
+ printk(KERN_ERR "%s: module is already loaded\n", mod->name);
+ kobject_put(kobj);
+ err = -EINVAL;
+ goto out;
+ }
+
mod->mkobj.mod = mod;

memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
--
1.5.3.8

--

To: Greg Kroah-Hartman <gregkh@...>
Cc: <linux-kernel@...>, Linus Torvalds <torvalds@...>, Andrew Morton <akpm@...>
Date: Tuesday, January 29, 2008 - 2:20 am

Oh, I pushed this as part of my module updates.

Unfortunately Andrew still doesn't seem to have picked up my patch queue, and
keeps grabbing random (sometimes outdated) patches which are also in my
tree :(

Cheers,
Rusty.
--

To: Greg Kroah-Hartman <gregkh@...>
Cc: <linux-kernel@...>, Rusty Russell <rusty@...>, Linus Torvalds <torvalds@...>, Andrew Morton <akpm@...>
Date: Monday, January 28, 2008 - 7:54 pm

Should not external modules with internal names be rejected at modprobe
time? Otherwise I'd wonder how you want to deal with /sys/modules/XXX if
both modules export some module_param()s.

It's just that if I happen to load vt.ko that the existing
/sys/modules/vt (from in-kernel vt.o) does not get overwritten by new
dentries that vt.ko will spawn. Something like /sys/modules/vt.1 perhaps
for /the new module with same name?
--

To: <linux-kernel@...>
Cc: Greg Kroah-Hartman <gregkh@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Ingo Molnar <mingo@...>
Date: Sunday, January 27, 2008 - 7:38 pm

This problem is due to the kobject rework recently done in this file.

The mce_amd_64.c code uses some wierd forward calls to back out of the
recursive way the code creates kobjects. Because of this, we need to
verify that we have really created a kobject before calling
kobject_uevent().

Many thanks to Yinghai Lu <yhlu.kernel@gmail.com> for reporting the
problem and testing.

Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Jacob Shin <jacob.shin@amd.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 7535887..073afa7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -450,7 +450,8 @@ recurse:
if (err)
goto out_free;

- kobject_uevent(&b->kobj, KOBJ_ADD);
+ if (b)
+ kobject_uevent(&b->kobj, KOBJ_ADD);

return err;

--
1.5.3.8

--

To: Greg Kroah-Hartman <gregkh@...>
Cc: <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 8:24 am

Acked-by: Ingo Molnar <mingo@elte.hu>

and please let me insert a minor kobject rant here: i do think it's way
too hard to figure out relatively minor-looking kobj bugs like this. It
took serious dedication from Yinghai Lu and yourself to nail this down,
and we wont always have that much luck in the future.

CONFIG_DEBUG_KOBJECT is way too feeble and does not actually attempt to
catch bugs like this. The effects of the bug were quite serious, and
this hasnt been the first time such hard-to-find kobj bugs occured -
every one of those incidents was in essence unnecessary.

Couldnt DEBUG_KOBJECT do better than this - just like we have list
debugging, PAGEALLOC and all the other debug checks?

Ingo
--

To: Ingo Molnar <mingo@...>
Cc: <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 1:37 pm

In fact, it is exactly what enabled me to catch this bug. The fact that
the kobject_uevent() debugging statment did not print out anything, and
oopsed, showed me that this was a simpler problem than I was originally
thinking it was.

Problem was we were passing in 0x18 for a kobject pointer to the
kobject_uevent() function. Even checking for NULL there would not have
caught this.

I have added a lot more "robustness" checks to the kobject core now, see
the lkml messages about the maple bus for examples of where it is
catching real problems already. And the kobject debugging code is now
"unified", printing out everything in a standard, easy to understand
manner.

I really don't want to get into adding a "magic value" to a kobject
field, and then checking it for every call, that too would have died on
this kind of bug, just like the debug printk did :)

But if there's anything that you think I can add to make it easier to
understand, please let me know.

The root problem of this bug was us using a goto to call forward into a
major code block within a function. Not just using a goto for error
cleanup, like the kernel code "normally" does. Because of that, I
totally missed this code path when reading the function many times.
It's nasty code complexity for no reason at all that causes more
problems than is needed, combined with a total lack of documentation for
how this kobject userspace interface is supposed to be used, that needs
to be fixed.

thanks,

greg k-h
--

To: Greg KH <gregkh@...>
Cc: <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 1:57 pm

anything that just causes the function to die on that bug reliably
during normal use, without having to enable DEBUG_KOBJECT which just
kills the system due to its verbosity. A magic value would be perfect.
Are kobjects protected against accidental copying? If not add &kobj to
the 'magic value' too, and check that - it becomes copying-resistent

no argument about that at all! This is exactly the same problem that the
spinlock/mutex/rwlock/etc. APIs were facing: it's used everywhere, and
that means dubious places as well that are not that well-known and are
rarely used. The more widely used a piece of kernel infrastructure is,
the more 'hardened' it must be against intentional or accidental abuse.
(at least with certain magic debug options enabled)

So please regard this a good thing - obscure APIs need no debugging
infrastructure - widely used ones do need quite extensive debugging
infrastructure.

For example locks currently have 4000 lines of code of debugging
infrastructure and 1500 lines of code of self-tests. The total amount of
core locking code is less 1000 lines of code. So for every line of
locking code there's more than 5 lines of debugging infrastructure (!).
And we only print to the console if we think there is a bug. That
extensive infrastructure is _good_, because locks are so central to all
our data structures that catching bugs as soon as possible (in fact
sooner than they trigger) aids in keeping bad code out of the kernel
ASAP.

Ingo
--

To: Ingo Molnar <mingo@...>
Cc: <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 2:32 pm

But for this case, how would we test that 0x18 is passed in for a

Oh, that's a very cool idea, I like it :)

So I guess the complaint is that CONFIG_KOBJECT is too verbose? I can
understand that, I'll look to clean things up as we have a lot of
"tracing" type functions in the debug output. Hm, I wonder if the new

Heh, good point, I'll look into fixing this up to be more useful to the
"normal" developer, and not just the two of us doing kobject core
development :)

thanks,

greg k-h
--

To: Greg KH <gregkh@...>
Cc: <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 3:01 pm

hey, you are welcome :-)

[ I guess i should not mention that i've implemented list debugging for
Linux that checksums the struct list contents and stores the checksum
in it (offset by a magic value plus to address of the list head), and
thus protects it against accidental corruption? It was capable of
reliably detecting mixed up list_add() arguments for example, it
detected list corruption of _every_ sort, it detected double
list_del() and list_add() of an already active list member as well. It
was even capable of detecting SMP races: two parallel unserialized
list_del()'s on the same list head were detected and warned about as

that would certainly be helpful and nice as a second line of defense.
(as a second line of defense the DEBUG_KOBJECT stuff was pretty
effective as well, as you have shown it with this bug.)

but the "first line of defense" must be an as automatically and as
transparently fault-resilient data structure as humanly possible.

"please enable DEBUG_KOBJECT and send me the logs" reduces the active
tester base to somewhere around 0.1% of the full tester base - and if
the bug is in any way spurious, the tester base goes down to 0.001% or
down to outright zero.

i really think that in terms of central kernel data structures, no
amount of debugging infrastructure is "too much" (within taste limits) -
as long as it's cleanly structured. Even 10,000 lines of debugging code

you are right, that is another important argument: meaningful debugging
checks that catch difficult to debug problems speed up development and
allows one to iterate the codebase faster. As long as the debugging
checks are transparent (i.e. they have no outrageous false positive rate
and they do not spam the logs), distros and testers will happily enable
it and you'll get much more feedback than you ever imagined would be
possible.

Time is precious, even if you know what you are doing, so the more of
the debugging you are capable of pushing to the comp...

To: Ingo Molnar <mingo@...>
Cc: Greg KH <gregkh@...>, <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 3:42 pm

[Ingo Molnar - Mon, Jan 28, 2008 at 08:01:49PM +0100]
|
| * Greg KH <gregkh@suse.de> wrote:
|
| > > Are kobjects protected against accidental copying? If not add &kobj
| > > to the 'magic value' too, and check that - it becomes
| > > copying-resistent that way and has the same cost to check. (which is
| > > negligible anyway)
| >
| > Oh, that's a very cool idea, I like it :)
|
| hey, you are welcome :-)
|
| [ I guess i should not mention that i've implemented list debugging for
| Linux that checksums the struct list contents and stores the checksum
| in it (offset by a magic value plus to address of the list head), and
| thus protects it against accidental corruption? It was capable of
| reliably detecting mixed up list_add() arguments for example, it
| detected list corruption of _every_ sort, it detected double
| list_del() and list_add() of an already active list member as well. It
| was even capable of detecting SMP races: two parallel unserialized
| list_del()'s on the same list head were detected and warned about as
| well. I guess i should release it one of these days? =B-) ]
|

did miss some words while reading this... ;)
really sorry, drop the mail i sent please

- Cyrill -
--

To: Ingo Molnar <mingo@...>
Cc: Greg KH <gregkh@...>, <linux-kernel@...>, Yinghai Lu <yhlu.kernel@...>, Jacob Shin <jacob.shin@...>, Linus Torvalds <torvalds@...>, Alexander Viro <viro@...>
Date: Monday, January 28, 2008 - 3:29 pm

[Ingo Molnar - Mon, Jan 28, 2008 at 08:01:49PM +0100]
|
| * Greg KH <gregkh@suse.de> wrote:
|
| > > Are kobjects protected against accidental copying? If not add &kobj
| > > to the 'magic value' too, and check that - it becomes
| > > copying-resistent that way and has the same cost to check. (which is
| > > negligible anyway)
| >
| > Oh, that's a very cool idea, I like it :)
|
| hey, you are welcome :-)
|
| [ I guess i should not mention that i've implemented list debugging for
| Linux that checksums the struct list contents and stores the checksum
| in it (offset by a magic value plus to address of the list head), and
| thus protects it against accidental corruption? It was capable of
| reliably detecting mixed up list_add() arguments for example, it
| detected list corruption of _every_ sort, it detected double
| list_del() and list_add() of an already active list member as well. It
| was even capable of detecting SMP races: two parallel unserialized
| list_del()'s on the same list head were detected and warned about as
| well. I guess i should release it one of these days? =B-) ]
|

interesting... something like hash checks in lockdep?

[...snip...]

- Cyrill -
--

To: <linux-kernel@...>
Cc: Greg Kroah-Hartman <gregkh@...>, Alexander van Heukelum <heukelum@...>, Jeremy Fitzhardinge <jeremy@...>, Kay Sievers <kay.sievers@...>
Date: Sunday, January 27, 2008 - 7:38 pm

This fixes up the driver core build errors when CONFIG_BLOCK=N

Thanks to Alexander van Heukelum <heukelum@mailshack.com> for the basis
of this patch, and to Jeremy Fitzhardinge <jeremy@goop.org> for
reporting the problem.

Cc: Alexander van Heukelum <heukelum@mailshack.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/base/class.c | 2 +-
drivers/base/core.c | 30 +++++++++++++++++++-----------
2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 59cf358..d916bbf 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -149,7 +149,7 @@ int class_register(struct class *cls)
if (error)
return error;

-#ifdef CONFIG_SYSFS_DEPRECATED
+#if defined(CONFIG_SYSFS_DEPRECATED) && defined(CONFIG_BLOCK)
/* let the block class directory show up in the root of sysfs */
if (cls != &block_class)
cls->subsys.kobj.kset = class_kset;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index edf3bbe..b172787 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -27,9 +27,17 @@
int (*platform_notify)(struct device *dev) = NULL;
int (*platform_notify_remove)(struct device *dev) = NULL;

-/*
- * sysfs bindings for devices.
- */
+#ifdef CONFIG_BLOCK
+static inline int device_is_not_partition(struct device *dev)
+{
+ return !(dev->type == &part_type);
+}
+#else
+static inline int device_is_not_partition(struct device *dev)
+{
+ return 1;
+}
+#endif

/**
* dev_driver_string - Return a device's driver name, if at all possible
@@ -652,14 +660,14 @@ static int device_add_class_symlinks(struct device *dev)
#ifdef CONFIG_SYSFS_DEPRECATED
/* stacked class devices need a symlink in the class directory */
if (dev->kobj.parent != &dev->class->subsys.kobj &&
- dev->type != &part_type) {
+ ...

Previous thread: cups slow on linux-2.6.24 by Jeff Chua on Sunday, January 27, 2008 - 7:18 pm. (1 message)

Next thread: 2.6.24 regression: Wake On Lan in sky2 broken on Mac mini by Tino Keitel on Sunday, January 27, 2008 - 8:29 pm. (10 messages)